Spaces:
Sleeping
Sleeping
testing minimum version
Browse files- app.py +366 -243
- requirements.txt +2 -6
- tools/__init__.py +0 -0
- tools/annotation.py +0 -107
- tools/face_detection.py +0 -481
- tools/face_recognition.py +0 -114
- tools/gallery.py +0 -37
- tools/nametypes.py +0 -33
- tools/pca.py +0 -59
- tools/utils.py +0 -164
app.py
CHANGED
|
@@ -2,291 +2,414 @@ import streamlit as st
|
|
| 2 |
import time
|
| 3 |
from typing import List
|
| 4 |
from streamlit_webrtc import webrtc_streamer, WebRtcMode
|
| 5 |
-
import logging
|
| 6 |
import av
|
| 7 |
-
import
|
| 8 |
-
|
| 9 |
-
import
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
from
|
| 13 |
-
|
| 14 |
-
from
|
| 15 |
-
from
|
| 16 |
-
from
|
| 17 |
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
# Set page layout for streamlit to wide
|
| 25 |
-
st.set_page_config(layout="wide", page_title="
|
| 26 |
-
with st.sidebar:
|
| 27 |
-
st.markdown("# Settings")
|
| 28 |
-
face_rec_on = st_toggle_switch(
|
| 29 |
-
"Live Face Recognition",
|
| 30 |
-
key="activate_face_rec",
|
| 31 |
-
default_value=True,
|
| 32 |
-
active_color=rgb(255, 75, 75),
|
| 33 |
-
track_color=rgb(50, 50, 50),
|
| 34 |
-
label_after=True,
|
| 35 |
-
)
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
resolution = st.selectbox(
|
| 40 |
-
"Webcam Resolution",
|
| 41 |
-
[(1920, 1080), (1280, 720), (640, 360)],
|
| 42 |
-
index=2,
|
| 43 |
-
)
|
| 44 |
-
st.markdown("Note: To change the resolution, you have to restart the stream.")
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
)
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
-
|
| 104 |
-
stats = Stats()
|
| 105 |
|
| 106 |
-
# Start timer for FPS calculation
|
| 107 |
-
frame_start = time.time()
|
| 108 |
|
|
|
|
| 109 |
# Convert frame to numpy array
|
| 110 |
frame = frame.to_ndarray(format="rgb24")
|
| 111 |
|
| 112 |
-
#
|
| 113 |
-
|
| 114 |
-
stats = stats._replace(resolution=resolution)
|
| 115 |
-
|
| 116 |
-
if face_rec_on:
|
| 117 |
-
# Run face detection
|
| 118 |
-
start = time.time()
|
| 119 |
-
frame, detections = face_detector(frame)
|
| 120 |
-
stats = stats._replace(num_faces=len(detections) if detections else 0)
|
| 121 |
-
stats = stats._replace(detection=(time.time() - start) * 1000)
|
| 122 |
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
identities = face_recognizer(frame, detections)
|
| 126 |
-
stats = stats._replace(recognition=(time.time() - start) * 1000)
|
| 127 |
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
matches = face_recognizer.find_matches(identities, gallery)
|
| 131 |
-
stats = stats._replace(matching=(time.time() - start) * 1000)
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
frame = annotator(frame, detections, identities, matches, gallery)
|
| 136 |
-
stats = stats._replace(annotation=(time.time() - start) * 1000)
|
| 137 |
|
| 138 |
# Convert frame back to av.VideoFrame
|
| 139 |
frame = av.VideoFrame.from_ndarray(frame, format="rgb24")
|
| 140 |
|
| 141 |
-
|
| 142 |
-
stats = stats._replace(fps=1 / (time.time() - frame_start))
|
| 143 |
|
| 144 |
-
# Send data to other thread
|
| 145 |
-
transfer_queue.put_nowait([stats, detections, identities, matches])
|
| 146 |
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
-
#
|
| 151 |
-
|
| 152 |
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
ctx = webrtc_streamer(
|
| 157 |
-
key="FaceIDAppDemo",
|
| 158 |
-
mode=WebRtcMode.SENDRECV,
|
| 159 |
-
rtc_configuration={"iceServers": get_ice_servers(name=ice_server)},
|
| 160 |
-
video_frame_callback=video_frame_callback,
|
| 161 |
-
media_stream_constraints={
|
| 162 |
-
"video": {
|
| 163 |
-
"width": {
|
| 164 |
-
"min": resolution[0],
|
| 165 |
-
"ideal": resolution[0],
|
| 166 |
-
"max": resolution[0],
|
| 167 |
-
},
|
| 168 |
-
"height": {
|
| 169 |
-
"min": resolution[1],
|
| 170 |
-
"ideal": resolution[1],
|
| 171 |
-
"max": resolution[1],
|
| 172 |
-
},
|
| 173 |
-
},
|
| 174 |
-
"audio": False,
|
| 175 |
-
},
|
| 176 |
-
async_processing=True,
|
| 177 |
-
)
|
| 178 |
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
with tab_metrics:
|
| 191 |
-
# Display Detections and Identities
|
| 192 |
-
st.markdown("**Detection Metrics**")
|
| 193 |
-
disp_detection_metrics = st.info("No detected faces yet ...")
|
| 194 |
-
|
| 195 |
-
# Display Recognition Metrics
|
| 196 |
-
st.markdown("**Recognition Metrics**")
|
| 197 |
-
disp_recognition_metrics = st.info("No recognized identities yet ...")
|
| 198 |
-
|
| 199 |
-
with tab_pca:
|
| 200 |
-
# Display 2D and 3D PCA
|
| 201 |
-
col1, col2 = st.columns(2)
|
| 202 |
-
col1.markdown("**PCA 2D**")
|
| 203 |
-
disp_pca3d = col1.info("Only available if more than 1 recognized face ...")
|
| 204 |
-
col2.markdown("**PCA 3D**")
|
| 205 |
-
disp_pca2d = col2.info("Only available if more than 1 recognized face ...")
|
| 206 |
-
freeze_pcas = st.button("Freeze PCAs for Interaction", key="reset_pca")
|
| 207 |
-
|
| 208 |
-
# Show PCAs
|
| 209 |
-
if freeze_pcas and gallery:
|
| 210 |
-
col1, col2 = st.columns(2)
|
| 211 |
-
if len(st.session_state.matches) > 1:
|
| 212 |
-
col1.plotly_chart(
|
| 213 |
-
pca(
|
| 214 |
-
st.session_state.matches,
|
| 215 |
-
st.session_state.identities,
|
| 216 |
-
gallery,
|
| 217 |
-
dim=3,
|
| 218 |
-
),
|
| 219 |
-
use_container_width=True,
|
| 220 |
-
)
|
| 221 |
-
col2.plotly_chart(
|
| 222 |
-
pca(
|
| 223 |
-
st.session_state.matches,
|
| 224 |
-
st.session_state.identities,
|
| 225 |
-
gallery,
|
| 226 |
-
dim=2,
|
| 227 |
-
),
|
| 228 |
-
use_container_width=True,
|
| 229 |
)
|
| 230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
-
#
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
|
|
|
|
|
|
| 237 |
)
|
| 238 |
-
else:
|
| 239 |
-
disp_identities_gal.info("No gallery images uploaded yet ...")
|
| 240 |
|
|
|
|
|
|
|
| 241 |
|
| 242 |
-
# Display Live Stats
|
| 243 |
if ctx.state.playing:
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
# Save for PCA Snapshot
|
| 249 |
-
st.session_state.identities = identities
|
| 250 |
-
st.session_state.matches = matches
|
| 251 |
-
|
| 252 |
-
# Show Stats
|
| 253 |
-
disp_stats.dataframe(
|
| 254 |
-
pd.DataFrame([stats]).applymap(lambda x: (format_dflist(x))),
|
| 255 |
-
use_container_width=True,
|
| 256 |
-
)
|
| 257 |
-
|
| 258 |
-
# Show Detections Metrics
|
| 259 |
-
if detections:
|
| 260 |
-
disp_detection_metrics.dataframe(
|
| 261 |
-
pd.DataFrame(detections).applymap(lambda x: (format_dflist(x))),
|
| 262 |
-
use_container_width=True,
|
| 263 |
-
)
|
| 264 |
-
else:
|
| 265 |
-
disp_detection_metrics.info("No detected faces yet ...")
|
| 266 |
-
|
| 267 |
-
# Show Match Metrics
|
| 268 |
-
if matches:
|
| 269 |
-
disp_recognition_metrics.dataframe(
|
| 270 |
-
pd.DataFrame(matches).applymap(lambda x: (format_dflist(x))),
|
| 271 |
-
use_container_width=True,
|
| 272 |
-
)
|
| 273 |
-
else:
|
| 274 |
-
disp_recognition_metrics.info("No recognized identities yet ...")
|
| 275 |
-
|
| 276 |
-
if len(matches) > 1:
|
| 277 |
-
disp_pca3d.plotly_chart(pca(matches, identities, gallery, dim=3), use_container_width=True)
|
| 278 |
-
disp_pca2d.plotly_chart(pca(matches, identities, gallery, dim=2), use_container_width=True)
|
| 279 |
-
else:
|
| 280 |
-
disp_pca3d.info("Only available if more than 1 recognized face ...")
|
| 281 |
-
disp_pca2d.info("Only available if more than 1 recognized face ...")
|
| 282 |
-
|
| 283 |
-
# Show Recognized Identities
|
| 284 |
-
if matches:
|
| 285 |
-
disp_identities_rec.image(
|
| 286 |
-
image=[identities[match.identity_idx].face_aligned for match in matches],
|
| 287 |
-
caption=[gallery[match.gallery_idx].name for match in matches],
|
| 288 |
-
)
|
| 289 |
-
else:
|
| 290 |
-
disp_identities_rec.info("No recognized identities yet ...")
|
| 291 |
|
| 292 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import time
|
| 3 |
from typing import List
|
| 4 |
from streamlit_webrtc import webrtc_streamer, WebRtcMode
|
|
|
|
| 5 |
import av
|
| 6 |
+
import numpy as np
|
| 7 |
+
import onnxruntime as rt
|
| 8 |
+
import threading
|
| 9 |
+
import mediapipe as mp
|
| 10 |
+
import os
|
| 11 |
+
from twilio.rest import Client
|
| 12 |
+
import cv2
|
| 13 |
+
from skimage.transform import SimilarityTransform
|
| 14 |
+
from types import SimpleNamespace
|
| 15 |
+
from sklearn.metrics.pairwise import cosine_distances
|
| 16 |
|
| 17 |
|
| 18 |
+
class Detection(SimpleNamespace):
|
| 19 |
+
bbox: List[List[float]] = None
|
| 20 |
+
landmarks: List[List[float]] = None
|
| 21 |
|
| 22 |
|
| 23 |
+
class Identity(SimpleNamespace):
|
| 24 |
+
detection: Detection = Detection()
|
| 25 |
+
name: str = None
|
| 26 |
+
embedding: np.ndarray = None
|
| 27 |
+
face: np.ndarray = None
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class Match(SimpleNamespace):
|
| 31 |
+
subject_id: Identity = Identity()
|
| 32 |
+
gallery_id: Identity = Identity()
|
| 33 |
+
distance: float = None
|
| 34 |
+
name: str = None
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class Grabber(object):
|
| 38 |
+
def __init__(self, video_receiver) -> None:
|
| 39 |
+
self.currentFrame = None
|
| 40 |
+
self.capture = video_receiver
|
| 41 |
+
self.thread = threading.Thread(target=self.update_frame)
|
| 42 |
+
self.thread.daemon = True
|
| 43 |
+
|
| 44 |
+
def update_frame(self) -> None:
|
| 45 |
+
while True:
|
| 46 |
+
self.currentFrame = self.capture.get_frame()
|
| 47 |
+
|
| 48 |
+
def get_frame(self) -> av.VideoFrame:
|
| 49 |
+
return self.currentFrame
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# Similarity threshold for face matching
|
| 53 |
+
SIMILARITY_THRESHOLD = 1.2
|
| 54 |
+
|
| 55 |
+
# Get twilio ice server configuration using twilio credentials from environment variables (set in streamlit secrets)
|
| 56 |
+
# Ref: https://www.twilio.com/docs/stun-turn/api
|
| 57 |
+
ICE_SERVERS = Client(os.environ["TWILIO_ACCOUNT_SID"], os.environ["TWILIO_AUTH_TOKEN"]).tokens.create().ice_servers
|
| 58 |
+
|
| 59 |
# Set page layout for streamlit to wide
|
| 60 |
+
st.set_page_config(layout="wide", page_title="Live Face Recognition", page_icon=":sunglasses:")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
+
# Streamlit app
|
| 63 |
+
st.title("Live Webcam Face Recognition")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
+
st.markdown("**Live Stream**")
|
| 66 |
+
ctx_container = st.container()
|
| 67 |
+
stream_container = st.empty()
|
| 68 |
+
|
| 69 |
+
st.markdown("**Matches**")
|
| 70 |
+
matches_container = st.info("No matches found yet ...")
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
# Init face detector and face recognizer
|
| 74 |
+
face_recognizer = rt.InferenceSession("model.fixed.onnx", providers=rt.get_available_providers())
|
| 75 |
+
face_detector = mp.solutions.face_mesh.FaceMesh(
|
| 76 |
+
refine_landmarks=True,
|
| 77 |
+
min_detection_confidence=0.5,
|
| 78 |
+
min_tracking_confidence=0.5,
|
| 79 |
+
max_num_faces=5,
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def detect_faces(frame: np.ndarray) -> List[Detection]:
|
| 84 |
+
# Process the frame with the face detector
|
| 85 |
+
result = face_detector.process(frame)
|
| 86 |
+
|
| 87 |
+
# Initialize an empty list to store the detected faces
|
| 88 |
+
detections = []
|
| 89 |
+
|
| 90 |
+
# Check if any faces were detected
|
| 91 |
+
if result.multi_face_landmarks:
|
| 92 |
+
# Iterate over each detected face
|
| 93 |
+
for count, detection in enumerate(result.multi_face_landmarks):
|
| 94 |
+
# Select 5 Landmarks
|
| 95 |
+
five_landmarks = np.asarray(detection.landmark)[[470, 475, 1, 57, 287]]
|
| 96 |
+
|
| 97 |
+
# Extract the x and y coordinates of the landmarks of interest
|
| 98 |
+
landmarks = [[landmark.x * frame.shape[1], landmark.y * frame.shape[0]] for landmark in five_landmarks]
|
| 99 |
+
|
| 100 |
+
# Extract the x and y coordinates of all landmarks
|
| 101 |
+
all_x_coords = [landmark.x * frame.shape[1] for landmark in detection.landmark]
|
| 102 |
+
all_y_coords = [landmark.y * frame.shape[0] for landmark in detection.landmark]
|
| 103 |
+
|
| 104 |
+
# Compute the bounding box of the face
|
| 105 |
+
x_min, x_max = int(min(all_x_coords)), int(max(all_x_coords))
|
| 106 |
+
y_min, y_max = int(min(all_y_coords)), int(max(all_y_coords))
|
| 107 |
+
bbox = [[x_min, y_min], [x_max, y_max]]
|
| 108 |
+
|
| 109 |
+
# Create a Detection object for the face
|
| 110 |
+
detection = Detection(
|
| 111 |
+
idx=count,
|
| 112 |
+
bbox=bbox,
|
| 113 |
+
landmarks=landmarks,
|
| 114 |
+
confidence=None,
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# Add the detection to the list
|
| 118 |
+
detections.append(detection)
|
| 119 |
+
|
| 120 |
+
# Return the list of detections
|
| 121 |
+
return detections
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def recognize_faces(frame: np.ndarray, detections: List[Detection]) -> List[Identity]:
|
| 125 |
+
if not detections:
|
| 126 |
+
return []
|
| 127 |
+
|
| 128 |
+
identities = []
|
| 129 |
+
for detection in detections:
|
| 130 |
+
# ALIGNMENT -----------------------------------------------------------
|
| 131 |
+
# Target landmark coordinates (as used in training)
|
| 132 |
+
landmarks_target = np.array(
|
| 133 |
+
[
|
| 134 |
+
[38.2946, 51.6963],
|
| 135 |
+
[73.5318, 51.5014],
|
| 136 |
+
[56.0252, 71.7366],
|
| 137 |
+
[41.5493, 92.3655],
|
| 138 |
+
[70.7299, 92.2041],
|
| 139 |
+
],
|
| 140 |
+
dtype=np.float32,
|
| 141 |
)
|
| 142 |
+
tform = SimilarityTransform()
|
| 143 |
+
tform.estimate(detection.landmarks, landmarks_target)
|
| 144 |
+
tmatrix = tform.params[0:2, :]
|
| 145 |
+
face_aligned = cv2.warpAffine(frame, tmatrix, (112, 112), borderValue=0.0)
|
| 146 |
+
# ---------------------------------------------------------------------
|
| 147 |
+
|
| 148 |
+
# INFERENCE -----------------------------------------------------------
|
| 149 |
+
# Inference face embeddings with onnxruntime
|
| 150 |
+
input_image = (np.asarray([face_aligned]).astype(np.float32) / 255.0).clip(0.0, 1.0)
|
| 151 |
+
embedding = face_recognizer.run(None, {"input_image": input_image})[0][0]
|
| 152 |
+
# ---------------------------------------------------------------------
|
| 153 |
+
|
| 154 |
+
# Create Identity object
|
| 155 |
+
identities.append(Identity(detection=detection, embedding=embedding, face=face_aligned))
|
| 156 |
+
|
| 157 |
+
return identities
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def match_faces(subjects: List[Identity], gallery: List[Identity]) -> List[Match]:
|
| 161 |
+
if len(gallery) == 0 or len(subjects) == 0:
|
| 162 |
+
return []
|
| 163 |
+
|
| 164 |
+
# Get Embeddings
|
| 165 |
+
embs_gal = np.asarray([identity.embedding for identity in gallery])
|
| 166 |
+
embs_det = np.asarray([identity.embedding for identity in subjects])
|
| 167 |
+
|
| 168 |
+
# Calculate Cosine Distances
|
| 169 |
+
cos_distances = cosine_distances(embs_det, embs_gal)
|
| 170 |
+
|
| 171 |
+
# Find Matches
|
| 172 |
+
matches = []
|
| 173 |
+
for ident_idx, identity in enumerate(subjects):
|
| 174 |
+
dists_to_identity = cos_distances[ident_idx]
|
| 175 |
+
idx_min = np.argmin(dists_to_identity)
|
| 176 |
+
if dists_to_identity[idx_min] < SIMILARITY_THRESHOLD:
|
| 177 |
+
matches.append(
|
| 178 |
+
Match(
|
| 179 |
+
subject_id=identity,
|
| 180 |
+
gallery_id=gallery[idx_min],
|
| 181 |
+
distance=dists_to_identity[idx_min],
|
| 182 |
+
)
|
| 183 |
+
)
|
| 184 |
|
| 185 |
+
# Sort Matches by identity_idx
|
| 186 |
+
matches = sorted(matches, key=lambda match: match.gallery_id.name)
|
| 187 |
+
|
| 188 |
+
return matches
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def draw_annotations(frame: np.ndarray, detections: List[Detection], matches: List[Match]) -> np.ndarray:
|
| 192 |
+
global timestamp
|
| 193 |
+
shape = np.asarray(frame.shape[:2][::-1])
|
| 194 |
+
|
| 195 |
+
# Upscale frame to 1080p for better visualization of drawn annotations
|
| 196 |
+
frame = cv2.resize(frame, (1920, 1080))
|
| 197 |
+
upscale_factor = np.asarray([1920 / shape[0], 1080 / shape[1]])
|
| 198 |
+
shape = np.asarray(frame.shape[:2][::-1])
|
| 199 |
+
|
| 200 |
+
# Make frame writeable (for better performance)
|
| 201 |
+
frame.flags.writeable = True
|
| 202 |
+
|
| 203 |
+
fps = 1 / (time.time() - timestamp)
|
| 204 |
+
timestamp = time.time()
|
| 205 |
+
|
| 206 |
+
# Draw FPS
|
| 207 |
+
cv2.putText(
|
| 208 |
+
frame,
|
| 209 |
+
f"FPS: {fps:.1f}",
|
| 210 |
+
(20, 40),
|
| 211 |
+
cv2.FONT_HERSHEY_SIMPLEX,
|
| 212 |
+
1,
|
| 213 |
+
(0, 255, 0),
|
| 214 |
+
2,
|
| 215 |
)
|
| 216 |
|
| 217 |
+
# Draw Detections
|
| 218 |
+
for detection in detections:
|
| 219 |
+
# Draw Landmarks
|
| 220 |
+
for landmark in detection.landmarks:
|
| 221 |
+
cv2.circle(
|
| 222 |
+
frame,
|
| 223 |
+
(landmark * upscale_factor).astype(int),
|
| 224 |
+
2,
|
| 225 |
+
(255, 255, 255),
|
| 226 |
+
-1,
|
| 227 |
+
)
|
| 228 |
|
| 229 |
+
# Draw Bounding Box
|
| 230 |
+
cv2.rectangle(
|
| 231 |
+
frame,
|
| 232 |
+
(detection.bbox[0] * upscale_factor).astype(int),
|
| 233 |
+
(detection.bbox[1] * upscale_factor).astype(int),
|
| 234 |
+
(255, 0, 0),
|
| 235 |
+
2,
|
| 236 |
+
)
|
| 237 |
|
| 238 |
+
# Draw Index
|
| 239 |
+
cv2.putText(
|
| 240 |
+
frame,
|
| 241 |
+
str(detection.idx),
|
| 242 |
+
(
|
| 243 |
+
((detection.bbox[1][0] + 2) * upscale_factor[0]).astype(int),
|
| 244 |
+
((detection.bbox[1][1] + 2) * upscale_factor[1]).astype(int),
|
| 245 |
+
),
|
| 246 |
+
cv2.LINE_AA,
|
| 247 |
+
0.5,
|
| 248 |
+
(0, 0, 0),
|
| 249 |
+
2,
|
| 250 |
+
)
|
| 251 |
|
| 252 |
+
# Draw Matches
|
| 253 |
+
for match in matches:
|
| 254 |
+
detection = match.subject_id.detection
|
| 255 |
+
name = match.gallery_id.name
|
| 256 |
+
|
| 257 |
+
# Draw Bounding Box in green
|
| 258 |
+
cv2.rectangle(
|
| 259 |
+
frame,
|
| 260 |
+
(detection.bbox[0] * upscale_factor).astype(int),
|
| 261 |
+
(detection.bbox[1] * upscale_factor).astype(int),
|
| 262 |
+
(0, 255, 0),
|
| 263 |
+
2,
|
| 264 |
+
)
|
| 265 |
|
| 266 |
+
# Draw Banner
|
| 267 |
+
cv2.rectangle(
|
| 268 |
+
frame,
|
| 269 |
+
(
|
| 270 |
+
(detection.bbox[0][0] * upscale_factor[0]).astype(int),
|
| 271 |
+
(detection.bbox[0][1] * upscale_factor[1] - (shape[1] // 25)).astype(int),
|
| 272 |
+
),
|
| 273 |
+
(
|
| 274 |
+
(detection.bbox[1][0] * upscale_factor[0]).astype(int),
|
| 275 |
+
(detection.bbox[0][1] * upscale_factor[1]).astype(int),
|
| 276 |
+
),
|
| 277 |
+
(255, 255, 255),
|
| 278 |
+
-1,
|
| 279 |
+
)
|
| 280 |
|
| 281 |
+
# Draw Name
|
| 282 |
+
cv2.putText(
|
| 283 |
+
frame,
|
| 284 |
+
name,
|
| 285 |
+
(
|
| 286 |
+
((detection.bbox[0][0] + shape[0] // 400) * upscale_factor[0]).astype(int),
|
| 287 |
+
((detection.bbox[0][1] - shape[1] // 50) * upscale_factor[1]).astype(int),
|
| 288 |
+
),
|
| 289 |
+
cv2.LINE_AA,
|
| 290 |
+
0.7,
|
| 291 |
+
(0, 0, 0),
|
| 292 |
+
2,
|
| 293 |
+
)
|
| 294 |
|
| 295 |
+
# Draw Distance
|
| 296 |
+
cv2.putText(
|
| 297 |
+
frame,
|
| 298 |
+
f" Distance: {match.distance:.2f}",
|
| 299 |
+
(
|
| 300 |
+
((detection.bbox[0][0] + shape[0] // 400) * upscale_factor[0]).astype(int),
|
| 301 |
+
((detection.bbox[0][1] - shape[1] // 350) * upscale_factor[1]).astype(int),
|
| 302 |
+
),
|
| 303 |
+
cv2.LINE_AA,
|
| 304 |
+
0.5,
|
| 305 |
+
(0, 0, 0),
|
| 306 |
+
2,
|
| 307 |
+
)
|
| 308 |
|
| 309 |
+
return frame
|
|
|
|
| 310 |
|
|
|
|
|
|
|
| 311 |
|
| 312 |
+
def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
| 313 |
# Convert frame to numpy array
|
| 314 |
frame = frame.to_ndarray(format="rgb24")
|
| 315 |
|
| 316 |
+
# Run face detection
|
| 317 |
+
detections = detect_faces(frame)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
|
| 319 |
+
# Run face recognition
|
| 320 |
+
subjects = recognize_faces(frame, detections)
|
|
|
|
|
|
|
| 321 |
|
| 322 |
+
# Run face matching
|
| 323 |
+
matches = match_faces(subjects, gallery)
|
|
|
|
|
|
|
| 324 |
|
| 325 |
+
# Draw annotations
|
| 326 |
+
frame = draw_annotations(frame, detections, matches)
|
|
|
|
|
|
|
| 327 |
|
| 328 |
# Convert frame back to av.VideoFrame
|
| 329 |
frame = av.VideoFrame.from_ndarray(frame, format="rgb24")
|
| 330 |
|
| 331 |
+
return frame, matches
|
|
|
|
| 332 |
|
|
|
|
|
|
|
| 333 |
|
| 334 |
+
# Sidebar for face gallery
|
| 335 |
+
with st.sidebar:
|
| 336 |
+
st.markdown("# Face Gallery")
|
| 337 |
+
files = st.sidebar.file_uploader(
|
| 338 |
+
"Upload images to gallery",
|
| 339 |
+
type=["png", "jpg", "jpeg"],
|
| 340 |
+
accept_multiple_files=True,
|
| 341 |
+
label_visibility="collapsed",
|
| 342 |
+
)
|
| 343 |
|
| 344 |
+
# Init gallery
|
| 345 |
+
gallery = []
|
| 346 |
+
for file in files:
|
| 347 |
+
# Read file bytes
|
| 348 |
+
file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
|
| 349 |
|
| 350 |
+
# Decode image and convert from BGR to RGB
|
| 351 |
+
img = cv2.cvtColor(cv2.imdecode(file_bytes, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
|
| 352 |
|
| 353 |
+
# Detect faces
|
| 354 |
+
detections = detect_faces(img)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
|
| 356 |
+
if detections:
|
| 357 |
+
# recognize faces
|
| 358 |
+
subjects = recognize_faces(img, detections[:1])
|
| 359 |
+
|
| 360 |
+
# Add subjects to gallery
|
| 361 |
+
gallery.append(
|
| 362 |
+
Identity(
|
| 363 |
+
name=os.path.splitext(file.name)[0],
|
| 364 |
+
embedding=subjects[0].embedding,
|
| 365 |
+
face=subjects[0].face,
|
| 366 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
)
|
| 368 |
|
| 369 |
+
# Show gallery images
|
| 370 |
+
st.image(
|
| 371 |
+
image=[identity.face for identity in gallery],
|
| 372 |
+
caption=[identity.name for identity in gallery],
|
| 373 |
+
)
|
| 374 |
|
| 375 |
+
# Start streaming component
|
| 376 |
+
with ctx_container:
|
| 377 |
+
ctx = webrtc_streamer(
|
| 378 |
+
key="LiveFaceRecognition",
|
| 379 |
+
mode=WebRtcMode.SENDONLY,
|
| 380 |
+
rtc_configuration={"iceServers": ICE_SERVERS},
|
| 381 |
+
media_stream_constraints={"video": {"width": 1920}, "audio": False},
|
| 382 |
)
|
|
|
|
|
|
|
| 383 |
|
| 384 |
+
# Initialize frame grabber
|
| 385 |
+
grabber = Grabber(ctx.video_receiver)
|
| 386 |
|
|
|
|
| 387 |
if ctx.state.playing:
|
| 388 |
+
# Start frame grabber in background thread
|
| 389 |
+
grabber.thread.start()
|
| 390 |
+
timestamp = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
|
| 392 |
+
# Start main loop
|
| 393 |
+
while True:
|
| 394 |
+
frame = grabber.get_frame()
|
| 395 |
+
if frame is not None:
|
| 396 |
+
# Print frame timestamp to streamlit
|
| 397 |
+
st.write(f"Frame timestamp: {frame.time}")
|
| 398 |
+
|
| 399 |
+
# Run face detection and recognition
|
| 400 |
+
frame, matches = video_frame_callback(frame)
|
| 401 |
+
|
| 402 |
+
# Convert frame to numpy array
|
| 403 |
+
frame = frame.to_ndarray(format="rgb24")
|
| 404 |
+
|
| 405 |
+
# Show Stream
|
| 406 |
+
stream_container.image(frame, channels="RGB")
|
| 407 |
+
|
| 408 |
+
# Show Matches
|
| 409 |
+
if matches:
|
| 410 |
+
matches_container.image(
|
| 411 |
+
image=[match.subject_id.face for match in matches],
|
| 412 |
+
caption=[match.gallery_id.name for match in matches],
|
| 413 |
+
)
|
| 414 |
+
else:
|
| 415 |
+
matches_container.info("No matches found yet ...")
|
requirements.txt
CHANGED
|
@@ -1,13 +1,9 @@
|
|
| 1 |
streamlit
|
| 2 |
scikit-image
|
| 3 |
scikit-learn
|
| 4 |
-
mediapipe
|
| 5 |
opencv-python-headless
|
| 6 |
watchdog
|
| 7 |
streamlit-webrtc
|
| 8 |
-
matplotlib
|
| 9 |
-
streamlit-toggle-switch
|
| 10 |
-
tflite-runtime
|
| 11 |
twilio
|
| 12 |
-
|
| 13 |
-
|
|
|
|
| 1 |
streamlit
|
| 2 |
scikit-image
|
| 3 |
scikit-learn
|
|
|
|
| 4 |
opencv-python-headless
|
| 5 |
watchdog
|
| 6 |
streamlit-webrtc
|
|
|
|
|
|
|
|
|
|
| 7 |
twilio
|
| 8 |
+
onnxruntime
|
| 9 |
+
mediapipe
|
tools/__init__.py
DELETED
|
File without changes
|
tools/annotation.py
DELETED
|
@@ -1,107 +0,0 @@
|
|
| 1 |
-
import numpy as np
|
| 2 |
-
import cv2
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
class Annotation:
|
| 6 |
-
def __init__(self, draw_bbox=True, draw_landmarks=True, draw_name=True, upscale=True):
|
| 7 |
-
self.bbox = draw_bbox
|
| 8 |
-
self.landmarks = draw_landmarks
|
| 9 |
-
self.name = draw_name
|
| 10 |
-
self.upscale = upscale
|
| 11 |
-
|
| 12 |
-
def __call__(self, frame, detections, identities, matches, gallery):
|
| 13 |
-
shape = np.asarray(frame.shape[:2][::-1])
|
| 14 |
-
if self.upscale:
|
| 15 |
-
frame = cv2.resize(frame, (1920, 1080))
|
| 16 |
-
upscale_factor = np.asarray([1920 / shape[0], 1080 / shape[1]])
|
| 17 |
-
shape = np.asarray(frame.shape[:2][::-1])
|
| 18 |
-
else:
|
| 19 |
-
upscale_factor = np.asarray([1, 1])
|
| 20 |
-
|
| 21 |
-
frame.flags.writeable = True
|
| 22 |
-
|
| 23 |
-
for detection in detections:
|
| 24 |
-
# Draw Landmarks
|
| 25 |
-
if self.landmarks:
|
| 26 |
-
for landmark in detection.landmarks:
|
| 27 |
-
cv2.circle(
|
| 28 |
-
frame,
|
| 29 |
-
(landmark * upscale_factor).astype(int),
|
| 30 |
-
2,
|
| 31 |
-
(255, 255, 255),
|
| 32 |
-
-1,
|
| 33 |
-
)
|
| 34 |
-
|
| 35 |
-
# Draw Bounding Box
|
| 36 |
-
if self.bbox:
|
| 37 |
-
cv2.rectangle(
|
| 38 |
-
frame,
|
| 39 |
-
(detection.bbox[0] * upscale_factor).astype(int),
|
| 40 |
-
(detection.bbox[1] * upscale_factor).astype(int),
|
| 41 |
-
(255, 0, 0),
|
| 42 |
-
2,
|
| 43 |
-
)
|
| 44 |
-
|
| 45 |
-
# Draw Index
|
| 46 |
-
cv2.putText(
|
| 47 |
-
frame,
|
| 48 |
-
str(detection.idx),
|
| 49 |
-
(
|
| 50 |
-
((detection.bbox[1][0] + 2) * upscale_factor[0]).astype(int),
|
| 51 |
-
((detection.bbox[1][1] + 2) * upscale_factor[1]).astype(int),
|
| 52 |
-
),
|
| 53 |
-
cv2.LINE_AA,
|
| 54 |
-
0.5,
|
| 55 |
-
(0, 0, 0),
|
| 56 |
-
2,
|
| 57 |
-
)
|
| 58 |
-
|
| 59 |
-
# Draw Name
|
| 60 |
-
if self.name:
|
| 61 |
-
for match in matches:
|
| 62 |
-
try:
|
| 63 |
-
detection = detections[identities[match.identity_idx].detection_idx]
|
| 64 |
-
except:
|
| 65 |
-
print("Identity IDX: ", match.identity_idx)
|
| 66 |
-
print("Len(Detections): ", len(detections))
|
| 67 |
-
print("Len(Identites): ", len(identities))
|
| 68 |
-
print("Detection IDX: ", identities[match.identity_idx].detection_idx)
|
| 69 |
-
|
| 70 |
-
# print("Detections: ", detections)
|
| 71 |
-
|
| 72 |
-
cv2.rectangle(
|
| 73 |
-
frame,
|
| 74 |
-
(detection.bbox[0] * upscale_factor).astype(int),
|
| 75 |
-
(detection.bbox[1] * upscale_factor).astype(int),
|
| 76 |
-
(0, 255, 0),
|
| 77 |
-
2,
|
| 78 |
-
)
|
| 79 |
-
|
| 80 |
-
cv2.rectangle(
|
| 81 |
-
frame,
|
| 82 |
-
(
|
| 83 |
-
(detection.bbox[0][0] * upscale_factor[0]).astype(int),
|
| 84 |
-
(detection.bbox[0][1] * upscale_factor[1] - (shape[1] // 25)).astype(int),
|
| 85 |
-
),
|
| 86 |
-
(
|
| 87 |
-
(detection.bbox[1][0] * upscale_factor[0]).astype(int),
|
| 88 |
-
(detection.bbox[0][1] * upscale_factor[1]).astype(int),
|
| 89 |
-
),
|
| 90 |
-
(255, 255, 255),
|
| 91 |
-
-1,
|
| 92 |
-
)
|
| 93 |
-
|
| 94 |
-
cv2.putText(
|
| 95 |
-
frame,
|
| 96 |
-
gallery[match.gallery_idx].name,
|
| 97 |
-
(
|
| 98 |
-
((detection.bbox[0][0] + shape[0] // 400) * upscale_factor[0]).astype(int),
|
| 99 |
-
((detection.bbox[0][1] - shape[1] // 100) * upscale_factor[1]).astype(int),
|
| 100 |
-
),
|
| 101 |
-
cv2.LINE_AA,
|
| 102 |
-
0.5,
|
| 103 |
-
(0, 0, 0),
|
| 104 |
-
2,
|
| 105 |
-
)
|
| 106 |
-
|
| 107 |
-
return frame
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/face_detection.py
DELETED
|
@@ -1,481 +0,0 @@
|
|
| 1 |
-
import tflite_runtime.interpreter as tflite
|
| 2 |
-
import cv2
|
| 3 |
-
import numpy as np
|
| 4 |
-
from .utils import tflite_inference
|
| 5 |
-
from .nametypes import Detection
|
| 6 |
-
from .utils import get_file
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
BASE_URL = "https://github.com/Martlgap/FaceIDLight/releases/download/v.0.1/"
|
| 10 |
-
|
| 11 |
-
FILE_HASHES = {
|
| 12 |
-
"o_net": "768385d570300648b7b881acbd418146522b79b4771029bb2e684bdd8c764b9f",
|
| 13 |
-
"p_net": "530183192e24f7cc86b6706e1eb600482c4ed4306399ac939c472e3957bae15e",
|
| 14 |
-
"r_net": "5ec33b065eb2802bc4c2575d21feff1a56958d854785bc3e2907d3b7ace861a2",
|
| 15 |
-
}
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
class StageStatus:
|
| 19 |
-
"""
|
| 20 |
-
Keeps status between MTCNN stages
|
| 21 |
-
"""
|
| 22 |
-
|
| 23 |
-
def __init__(self, pad_result: tuple = None, width=0, height=0):
|
| 24 |
-
self.width = width
|
| 25 |
-
self.height = height
|
| 26 |
-
self.dy = self.edy = self.dx = self.edx = self.y = self.ey = self.x = self.ex = self.tmp_w = self.tmp_h = []
|
| 27 |
-
|
| 28 |
-
if pad_result is not None:
|
| 29 |
-
self.update(pad_result)
|
| 30 |
-
|
| 31 |
-
def update(self, pad_result: tuple):
|
| 32 |
-
s = self
|
| 33 |
-
s.dy, s.edy, s.dx, s.edx, s.y, s.ey, s.x, s.ex, s.tmp_w, s.tmp_h = pad_result
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
class FaceDetection:
|
| 37 |
-
"""
|
| 38 |
-
Allows to perform MTCNN Detection ->
|
| 39 |
-
a) Detection of faces (with the confidence probability)
|
| 40 |
-
b) Detection of keypoints (left eye, right eye, nose, mouth_left, mouth_right)
|
| 41 |
-
"""
|
| 42 |
-
|
| 43 |
-
def __init__(
|
| 44 |
-
self,
|
| 45 |
-
min_face_size: int = 40,
|
| 46 |
-
steps_threshold: list = None,
|
| 47 |
-
scale_factor: float = 0.7,
|
| 48 |
-
min_detections_conf: float = 0.9,
|
| 49 |
-
):
|
| 50 |
-
"""
|
| 51 |
-
Initializes the MTCNN.
|
| 52 |
-
:param min_face_size: minimum size of the face to detect
|
| 53 |
-
:param steps_threshold: step's thresholds values
|
| 54 |
-
:param scale_factor: scale factor
|
| 55 |
-
"""
|
| 56 |
-
if steps_threshold is None:
|
| 57 |
-
steps_threshold = [0.6, 0.7, 0.7] # original mtcnn values [0.6, 0.7, 0.7]
|
| 58 |
-
self._min_face_size = min_face_size
|
| 59 |
-
self._steps_threshold = steps_threshold
|
| 60 |
-
self._scale_factor = scale_factor
|
| 61 |
-
self.min_detections_conf = min_detections_conf
|
| 62 |
-
self.p_net = tflite.Interpreter(model_path=get_file(BASE_URL + "p_net.tflite", FILE_HASHES["p_net"]))
|
| 63 |
-
self.r_net = tflite.Interpreter(model_path=get_file(BASE_URL + "r_net.tflite", FILE_HASHES["r_net"]))
|
| 64 |
-
self.o_net = tflite.Interpreter(model_path=get_file(BASE_URL + "o_net.tflite", FILE_HASHES["o_net"]))
|
| 65 |
-
|
| 66 |
-
def __call__(self, frame):
|
| 67 |
-
"""
|
| 68 |
-
Detects bounding boxes from the specified image.
|
| 69 |
-
:param img: image to process
|
| 70 |
-
:return: list containing all the bounding boxes detected with their keypoints.
|
| 71 |
-
|
| 72 |
-
From MTCNN:
|
| 73 |
-
# Total boxes (bBoxes for faces)
|
| 74 |
-
# 1. dim -> Number of found Faces
|
| 75 |
-
# 2. dim -> x_min, y_min, x_max, y_max, score
|
| 76 |
-
|
| 77 |
-
# Points (Landmarks left eye, right eye, nose, left mouth, right mouth)
|
| 78 |
-
# 1. dim -> Number of found Faces
|
| 79 |
-
# 2. dim -> x1, x2, x3, x4, x5, y2, y2, y3, y4, y5 Coordinates
|
| 80 |
-
"""
|
| 81 |
-
|
| 82 |
-
height, width, _ = frame.shape
|
| 83 |
-
stage_status = StageStatus(width=width, height=height)
|
| 84 |
-
m = 12 / self._min_face_size
|
| 85 |
-
min_layer = np.amin([height, width]) * m
|
| 86 |
-
scales = self.__compute_scale_pyramid(m, min_layer)
|
| 87 |
-
|
| 88 |
-
# We pipe here each of the stages
|
| 89 |
-
total_boxes, stage_status = self.__stage1(frame, scales, stage_status)
|
| 90 |
-
total_boxes, stage_status = self.__stage2(frame, total_boxes, stage_status)
|
| 91 |
-
bboxes, points = self.__stage3(frame, total_boxes, stage_status)
|
| 92 |
-
|
| 93 |
-
# Sort by location (to prevent flickering)
|
| 94 |
-
sort_idx = np.argsort(bboxes[:, 0])
|
| 95 |
-
bboxes = bboxes[sort_idx]
|
| 96 |
-
points = points[sort_idx]
|
| 97 |
-
|
| 98 |
-
# Transform to better shape and points now inside bbox
|
| 99 |
-
detections = []
|
| 100 |
-
cnt = 0
|
| 101 |
-
for i in range(bboxes.shape[0]):
|
| 102 |
-
conf = bboxes[i, -1].astype(np.float32)
|
| 103 |
-
if conf > self.min_detections_conf:
|
| 104 |
-
bboxes_c = np.reshape(bboxes[i, :-1], [2, 2]).astype(np.float32)
|
| 105 |
-
points_c = np.reshape(points[i], [2, 5]).transpose().astype(np.float32)
|
| 106 |
-
detections.append(
|
| 107 |
-
Detection(
|
| 108 |
-
idx=cnt,
|
| 109 |
-
bbox=list(bboxes_c),
|
| 110 |
-
landmarks=list(points_c),
|
| 111 |
-
confidence=conf,
|
| 112 |
-
)
|
| 113 |
-
)
|
| 114 |
-
cnt += 1
|
| 115 |
-
return frame, detections
|
| 116 |
-
|
| 117 |
-
def __compute_scale_pyramid(self, m, min_layer):
|
| 118 |
-
scales = []
|
| 119 |
-
factor_count = 0
|
| 120 |
-
|
| 121 |
-
while min_layer >= 12:
|
| 122 |
-
scales += [m * np.power(self._scale_factor, factor_count)]
|
| 123 |
-
min_layer = min_layer * self._scale_factor
|
| 124 |
-
factor_count += 1
|
| 125 |
-
|
| 126 |
-
return scales
|
| 127 |
-
|
| 128 |
-
@staticmethod
|
| 129 |
-
def __scale_image(image, scale: float):
|
| 130 |
-
"""
|
| 131 |
-
Scales the image to a given scale.
|
| 132 |
-
:param image:
|
| 133 |
-
:param scale:
|
| 134 |
-
:return:
|
| 135 |
-
"""
|
| 136 |
-
height, width, _ = image.shape
|
| 137 |
-
|
| 138 |
-
width_scaled = int(np.ceil(width * scale))
|
| 139 |
-
height_scaled = int(np.ceil(height * scale))
|
| 140 |
-
|
| 141 |
-
im_data = cv2.resize(image, (width_scaled, height_scaled), interpolation=cv2.INTER_AREA)
|
| 142 |
-
|
| 143 |
-
# Normalize the image's pixels
|
| 144 |
-
im_data_normalized = (im_data - 127.5) * 0.0078125
|
| 145 |
-
|
| 146 |
-
return im_data_normalized
|
| 147 |
-
|
| 148 |
-
@staticmethod
|
| 149 |
-
def __generate_bounding_box(imap, reg, scale, t):
|
| 150 |
-
# use heatmap to generate bounding boxes
|
| 151 |
-
stride = 2
|
| 152 |
-
cellsize = 12
|
| 153 |
-
|
| 154 |
-
imap = np.transpose(imap)
|
| 155 |
-
dx1 = np.transpose(reg[:, :, 0])
|
| 156 |
-
dy1 = np.transpose(reg[:, :, 1])
|
| 157 |
-
dx2 = np.transpose(reg[:, :, 2])
|
| 158 |
-
dy2 = np.transpose(reg[:, :, 3])
|
| 159 |
-
|
| 160 |
-
y, x = np.where(imap >= t)
|
| 161 |
-
|
| 162 |
-
if y.shape[0] == 1:
|
| 163 |
-
dx1 = np.flipud(dx1)
|
| 164 |
-
dy1 = np.flipud(dy1)
|
| 165 |
-
dx2 = np.flipud(dx2)
|
| 166 |
-
dy2 = np.flipud(dy2)
|
| 167 |
-
|
| 168 |
-
score = imap[(y, x)]
|
| 169 |
-
reg = np.transpose(np.vstack([dx1[(y, x)], dy1[(y, x)], dx2[(y, x)], dy2[(y, x)]]))
|
| 170 |
-
|
| 171 |
-
if reg.size == 0:
|
| 172 |
-
reg = np.empty(shape=(0, 3))
|
| 173 |
-
|
| 174 |
-
bb = np.transpose(np.vstack([y, x]))
|
| 175 |
-
|
| 176 |
-
q1 = np.fix((stride * bb + 1) / scale)
|
| 177 |
-
q2 = np.fix((stride * bb + cellsize) / scale)
|
| 178 |
-
boundingbox = np.hstack([q1, q2, np.expand_dims(score, 1), reg])
|
| 179 |
-
|
| 180 |
-
return boundingbox, reg
|
| 181 |
-
|
| 182 |
-
@staticmethod
|
| 183 |
-
def __nms(boxes, threshold, method):
|
| 184 |
-
"""
|
| 185 |
-
Non Maximum Suppression.
|
| 186 |
-
|
| 187 |
-
:param boxes: np array with bounding boxes.
|
| 188 |
-
:param threshold:
|
| 189 |
-
:param method: NMS method to apply. Available values ('Min', 'Union')
|
| 190 |
-
:return:
|
| 191 |
-
"""
|
| 192 |
-
if boxes.size == 0:
|
| 193 |
-
return np.empty((0, 3))
|
| 194 |
-
|
| 195 |
-
x1 = boxes[:, 0]
|
| 196 |
-
y1 = boxes[:, 1]
|
| 197 |
-
x2 = boxes[:, 2]
|
| 198 |
-
y2 = boxes[:, 3]
|
| 199 |
-
s = boxes[:, 4]
|
| 200 |
-
|
| 201 |
-
area = (x2 - x1 + 1) * (y2 - y1 + 1)
|
| 202 |
-
sorted_s = np.argsort(s)
|
| 203 |
-
|
| 204 |
-
pick = np.zeros_like(s, dtype=np.int16)
|
| 205 |
-
counter = 0
|
| 206 |
-
while sorted_s.size > 0:
|
| 207 |
-
i = sorted_s[-1]
|
| 208 |
-
pick[counter] = i
|
| 209 |
-
counter += 1
|
| 210 |
-
idx = sorted_s[0:-1]
|
| 211 |
-
|
| 212 |
-
xx1 = np.maximum(x1[i], x1[idx])
|
| 213 |
-
yy1 = np.maximum(y1[i], y1[idx])
|
| 214 |
-
xx2 = np.minimum(x2[i], x2[idx])
|
| 215 |
-
yy2 = np.minimum(y2[i], y2[idx])
|
| 216 |
-
|
| 217 |
-
w = np.maximum(0.0, xx2 - xx1 + 1)
|
| 218 |
-
h = np.maximum(0.0, yy2 - yy1 + 1)
|
| 219 |
-
|
| 220 |
-
inter = w * h
|
| 221 |
-
|
| 222 |
-
if method == "Min":
|
| 223 |
-
o = inter / np.minimum(area[i], area[idx])
|
| 224 |
-
else:
|
| 225 |
-
o = inter / (area[i] + area[idx] - inter)
|
| 226 |
-
|
| 227 |
-
sorted_s = sorted_s[np.where(o <= threshold)]
|
| 228 |
-
|
| 229 |
-
pick = pick[0:counter]
|
| 230 |
-
|
| 231 |
-
return pick
|
| 232 |
-
|
| 233 |
-
@staticmethod
|
| 234 |
-
def __pad(total_boxes, w, h):
|
| 235 |
-
# compute the padding coordinates (pad the bounding boxes to square)
|
| 236 |
-
tmp_w = (total_boxes[:, 2] - total_boxes[:, 0] + 1).astype(np.int32)
|
| 237 |
-
tmp_h = (total_boxes[:, 3] - total_boxes[:, 1] + 1).astype(np.int32)
|
| 238 |
-
numbox = total_boxes.shape[0]
|
| 239 |
-
|
| 240 |
-
dx = np.ones(numbox, dtype=np.int32)
|
| 241 |
-
dy = np.ones(numbox, dtype=np.int32)
|
| 242 |
-
edx = tmp_w.copy().astype(np.int32)
|
| 243 |
-
edy = tmp_h.copy().astype(np.int32)
|
| 244 |
-
|
| 245 |
-
x = total_boxes[:, 0].copy().astype(np.int32)
|
| 246 |
-
y = total_boxes[:, 1].copy().astype(np.int32)
|
| 247 |
-
ex = total_boxes[:, 2].copy().astype(np.int32)
|
| 248 |
-
ey = total_boxes[:, 3].copy().astype(np.int32)
|
| 249 |
-
|
| 250 |
-
tmp = np.where(ex > w)
|
| 251 |
-
edx.flat[tmp] = np.expand_dims(-ex[tmp] + w + tmp_w[tmp], 1)
|
| 252 |
-
ex[tmp] = w
|
| 253 |
-
|
| 254 |
-
tmp = np.where(ey > h)
|
| 255 |
-
edy.flat[tmp] = np.expand_dims(-ey[tmp] + h + tmp_h[tmp], 1)
|
| 256 |
-
ey[tmp] = h
|
| 257 |
-
|
| 258 |
-
tmp = np.where(x < 1)
|
| 259 |
-
dx.flat[tmp] = np.expand_dims(2 - x[tmp], 1)
|
| 260 |
-
x[tmp] = 1
|
| 261 |
-
|
| 262 |
-
tmp = np.where(y < 1)
|
| 263 |
-
dy.flat[tmp] = np.expand_dims(2 - y[tmp], 1)
|
| 264 |
-
y[tmp] = 1
|
| 265 |
-
|
| 266 |
-
return dy, edy, dx, edx, y, ey, x, ex, tmp_w, tmp_h
|
| 267 |
-
|
| 268 |
-
@staticmethod
|
| 269 |
-
def __rerec(bbox):
|
| 270 |
-
# convert bbox to square
|
| 271 |
-
height = bbox[:, 3] - bbox[:, 1]
|
| 272 |
-
width = bbox[:, 2] - bbox[:, 0]
|
| 273 |
-
max_side_length = np.maximum(width, height)
|
| 274 |
-
bbox[:, 0] = bbox[:, 0] + width * 0.5 - max_side_length * 0.5
|
| 275 |
-
bbox[:, 1] = bbox[:, 1] + height * 0.5 - max_side_length * 0.5
|
| 276 |
-
bbox[:, 2:4] = bbox[:, 0:2] + np.transpose(np.tile(max_side_length, (2, 1)))
|
| 277 |
-
return bbox
|
| 278 |
-
|
| 279 |
-
@staticmethod
|
| 280 |
-
def __bbreg(boundingbox, reg):
|
| 281 |
-
# calibrate bounding boxes
|
| 282 |
-
if reg.shape[1] == 1:
|
| 283 |
-
reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
|
| 284 |
-
|
| 285 |
-
w = boundingbox[:, 2] - boundingbox[:, 0] + 1
|
| 286 |
-
h = boundingbox[:, 3] - boundingbox[:, 1] + 1
|
| 287 |
-
b1 = boundingbox[:, 0] + reg[:, 0] * w
|
| 288 |
-
b2 = boundingbox[:, 1] + reg[:, 1] * h
|
| 289 |
-
b3 = boundingbox[:, 2] + reg[:, 2] * w
|
| 290 |
-
b4 = boundingbox[:, 3] + reg[:, 3] * h
|
| 291 |
-
boundingbox[:, 0:4] = np.transpose(np.vstack([b1, b2, b3, b4]))
|
| 292 |
-
return boundingbox
|
| 293 |
-
|
| 294 |
-
def __stage1(self, image, scales: list, stage_status: StageStatus):
|
| 295 |
-
"""
|
| 296 |
-
First stage of the MTCNN.
|
| 297 |
-
:param image:
|
| 298 |
-
:param scales:
|
| 299 |
-
:param stage_status:
|
| 300 |
-
:return:
|
| 301 |
-
"""
|
| 302 |
-
total_boxes = np.empty((0, 9))
|
| 303 |
-
status = stage_status
|
| 304 |
-
|
| 305 |
-
for scale in scales:
|
| 306 |
-
scaled_image = self.__scale_image(image, scale)
|
| 307 |
-
|
| 308 |
-
img_x = np.expand_dims(scaled_image, 0)
|
| 309 |
-
img_y = np.transpose(img_x, (0, 2, 1, 3))
|
| 310 |
-
|
| 311 |
-
out = tflite_inference(self.p_net, img_y)
|
| 312 |
-
|
| 313 |
-
out0 = np.transpose(out[0], (0, 2, 1, 3))
|
| 314 |
-
out1 = np.transpose(out[1], (0, 2, 1, 3))
|
| 315 |
-
|
| 316 |
-
boxes, _ = self.__generate_bounding_box(
|
| 317 |
-
out1[0, :, :, 1].copy(),
|
| 318 |
-
out0[0, :, :, :].copy(),
|
| 319 |
-
scale,
|
| 320 |
-
self._steps_threshold[0],
|
| 321 |
-
)
|
| 322 |
-
|
| 323 |
-
# inter-scale nms
|
| 324 |
-
pick = self.__nms(boxes.copy(), 0.5, "Union")
|
| 325 |
-
if boxes.size > 0 and pick.size > 0:
|
| 326 |
-
boxes = boxes[pick, :]
|
| 327 |
-
total_boxes = np.append(total_boxes, boxes, axis=0)
|
| 328 |
-
|
| 329 |
-
numboxes = total_boxes.shape[0]
|
| 330 |
-
|
| 331 |
-
if numboxes > 0:
|
| 332 |
-
pick = self.__nms(total_boxes.copy(), 0.7, "Union")
|
| 333 |
-
total_boxes = total_boxes[pick, :]
|
| 334 |
-
|
| 335 |
-
regw = total_boxes[:, 2] - total_boxes[:, 0]
|
| 336 |
-
regh = total_boxes[:, 3] - total_boxes[:, 1]
|
| 337 |
-
|
| 338 |
-
qq1 = total_boxes[:, 0] + total_boxes[:, 5] * regw
|
| 339 |
-
qq2 = total_boxes[:, 1] + total_boxes[:, 6] * regh
|
| 340 |
-
qq3 = total_boxes[:, 2] + total_boxes[:, 7] * regw
|
| 341 |
-
qq4 = total_boxes[:, 3] + total_boxes[:, 8] * regh
|
| 342 |
-
|
| 343 |
-
total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:, 4]]))
|
| 344 |
-
total_boxes = self.__rerec(total_boxes.copy())
|
| 345 |
-
|
| 346 |
-
total_boxes[:, 0:4] = np.fix(total_boxes[:, 0:4]).astype(np.int32)
|
| 347 |
-
status = StageStatus(
|
| 348 |
-
self.__pad(total_boxes.copy(), stage_status.width, stage_status.height),
|
| 349 |
-
width=stage_status.width,
|
| 350 |
-
height=stage_status.height,
|
| 351 |
-
)
|
| 352 |
-
|
| 353 |
-
return total_boxes, status
|
| 354 |
-
|
| 355 |
-
def __stage2(self, img, total_boxes, stage_status: StageStatus):
|
| 356 |
-
"""
|
| 357 |
-
Second stage of the MTCNN.
|
| 358 |
-
:param img:
|
| 359 |
-
:param total_boxes:
|
| 360 |
-
:param stage_status:
|
| 361 |
-
:return:
|
| 362 |
-
"""
|
| 363 |
-
|
| 364 |
-
num_boxes = total_boxes.shape[0]
|
| 365 |
-
if num_boxes == 0:
|
| 366 |
-
return total_boxes, stage_status
|
| 367 |
-
|
| 368 |
-
# second stage
|
| 369 |
-
tempimg = np.zeros(shape=(24, 24, 3, num_boxes))
|
| 370 |
-
|
| 371 |
-
for k in range(0, num_boxes):
|
| 372 |
-
tmp = np.zeros((int(stage_status.tmp_h[k]), int(stage_status.tmp_w[k]), 3))
|
| 373 |
-
|
| 374 |
-
tmp[
|
| 375 |
-
stage_status.dy[k] - 1 : stage_status.edy[k],
|
| 376 |
-
stage_status.dx[k] - 1 : stage_status.edx[k],
|
| 377 |
-
:,
|
| 378 |
-
] = img[
|
| 379 |
-
stage_status.y[k] - 1 : stage_status.ey[k],
|
| 380 |
-
stage_status.x[k] - 1 : stage_status.ex[k],
|
| 381 |
-
:,
|
| 382 |
-
]
|
| 383 |
-
|
| 384 |
-
if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
|
| 385 |
-
tempimg[:, :, :, k] = cv2.resize(tmp, (24, 24), interpolation=cv2.INTER_AREA)
|
| 386 |
-
|
| 387 |
-
else:
|
| 388 |
-
return np.empty(shape=(0,)), stage_status
|
| 389 |
-
|
| 390 |
-
tempimg = (tempimg - 127.5) * 0.0078125
|
| 391 |
-
tempimg1 = np.transpose(tempimg, (3, 1, 0, 2))
|
| 392 |
-
|
| 393 |
-
out = tflite_inference(self.r_net, tempimg1)
|
| 394 |
-
|
| 395 |
-
out0 = np.transpose(out[0])
|
| 396 |
-
out1 = np.transpose(out[1])
|
| 397 |
-
|
| 398 |
-
score = out1[1, :]
|
| 399 |
-
|
| 400 |
-
ipass = np.where(score > self._steps_threshold[1])
|
| 401 |
-
|
| 402 |
-
total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)])
|
| 403 |
-
|
| 404 |
-
mv = out0[:, ipass[0]]
|
| 405 |
-
|
| 406 |
-
if total_boxes.shape[0] > 0:
|
| 407 |
-
pick = self.__nms(total_boxes, 0.7, "Union")
|
| 408 |
-
total_boxes = total_boxes[pick, :]
|
| 409 |
-
total_boxes = self.__bbreg(total_boxes.copy(), np.transpose(mv[:, pick]))
|
| 410 |
-
total_boxes = self.__rerec(total_boxes.copy())
|
| 411 |
-
|
| 412 |
-
return total_boxes, stage_status
|
| 413 |
-
|
| 414 |
-
def __stage3(self, img, total_boxes, stage_status: StageStatus):
|
| 415 |
-
"""
|
| 416 |
-
Third stage of the MTCNN.
|
| 417 |
-
|
| 418 |
-
:param img:
|
| 419 |
-
:param total_boxes:
|
| 420 |
-
:param stage_status:
|
| 421 |
-
:return:
|
| 422 |
-
"""
|
| 423 |
-
num_boxes = total_boxes.shape[0]
|
| 424 |
-
if num_boxes == 0:
|
| 425 |
-
return total_boxes, np.empty(shape=(0,))
|
| 426 |
-
|
| 427 |
-
total_boxes = np.fix(total_boxes).astype(np.int32)
|
| 428 |
-
|
| 429 |
-
status = StageStatus(
|
| 430 |
-
self.__pad(total_boxes.copy(), stage_status.width, stage_status.height),
|
| 431 |
-
width=stage_status.width,
|
| 432 |
-
height=stage_status.height,
|
| 433 |
-
)
|
| 434 |
-
|
| 435 |
-
tempimg = np.zeros((48, 48, 3, num_boxes))
|
| 436 |
-
|
| 437 |
-
for k in range(0, num_boxes):
|
| 438 |
-
tmp = np.zeros((int(status.tmp_h[k]), int(status.tmp_w[k]), 3))
|
| 439 |
-
|
| 440 |
-
tmp[status.dy[k] - 1 : status.edy[k], status.dx[k] - 1 : status.edx[k], :] = img[
|
| 441 |
-
status.y[k] - 1 : status.ey[k], status.x[k] - 1 : status.ex[k], :
|
| 442 |
-
]
|
| 443 |
-
|
| 444 |
-
if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
|
| 445 |
-
tempimg[:, :, :, k] = cv2.resize(tmp, (48, 48), interpolation=cv2.INTER_AREA)
|
| 446 |
-
else:
|
| 447 |
-
return np.empty(shape=(0,)), np.empty(shape=(0,))
|
| 448 |
-
|
| 449 |
-
tempimg = (tempimg - 127.5) * 0.0078125
|
| 450 |
-
tempimg1 = np.transpose(tempimg, (3, 1, 0, 2))
|
| 451 |
-
|
| 452 |
-
out = tflite_inference(self.o_net, tempimg1)
|
| 453 |
-
out0 = np.transpose(out[0])
|
| 454 |
-
out1 = np.transpose(out[1])
|
| 455 |
-
out2 = np.transpose(out[2])
|
| 456 |
-
|
| 457 |
-
score = out2[1, :]
|
| 458 |
-
|
| 459 |
-
points = out1
|
| 460 |
-
|
| 461 |
-
ipass = np.where(score > self._steps_threshold[2])
|
| 462 |
-
|
| 463 |
-
points = points[:, ipass[0]]
|
| 464 |
-
|
| 465 |
-
total_boxes = np.hstack([total_boxes[ipass[0], 0:4].copy(), np.expand_dims(score[ipass].copy(), 1)])
|
| 466 |
-
|
| 467 |
-
mv = out0[:, ipass[0]]
|
| 468 |
-
|
| 469 |
-
w = total_boxes[:, 2] - total_boxes[:, 0] + 1
|
| 470 |
-
h = total_boxes[:, 3] - total_boxes[:, 1] + 1
|
| 471 |
-
|
| 472 |
-
points[0:5, :] = np.tile(w, (5, 1)) * points[0:5, :] + np.tile(total_boxes[:, 0], (5, 1)) - 1
|
| 473 |
-
points[5:10, :] = np.tile(h, (5, 1)) * points[5:10, :] + np.tile(total_boxes[:, 1], (5, 1)) - 1
|
| 474 |
-
|
| 475 |
-
if total_boxes.shape[0] > 0:
|
| 476 |
-
total_boxes = self.__bbreg(total_boxes.copy(), np.transpose(mv))
|
| 477 |
-
pick = self.__nms(total_boxes.copy(), 0.7, "Min")
|
| 478 |
-
total_boxes = total_boxes[pick, :]
|
| 479 |
-
points = points[:, pick]
|
| 480 |
-
|
| 481 |
-
return total_boxes, points.transpose()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/face_recognition.py
DELETED
|
@@ -1,114 +0,0 @@
|
|
| 1 |
-
from .utils import tflite_inference
|
| 2 |
-
from .nametypes import Identity, Match
|
| 3 |
-
from sklearn.metrics.pairwise import cosine_distances
|
| 4 |
-
import numpy as np
|
| 5 |
-
import cv2
|
| 6 |
-
from skimage.transform import SimilarityTransform
|
| 7 |
-
from .utils import get_file
|
| 8 |
-
import tflite_runtime.interpreter as tflite
|
| 9 |
-
from typing import Literal
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
BASE_URL = "https://github.com/Martlgap/FaceIDLight/releases/download/v.0.1/"
|
| 13 |
-
|
| 14 |
-
FILE_HASHES = {
|
| 15 |
-
"mobileNet": "6c19b789f661caa8da735566490bfd8895beffb2a1ec97a56b126f0539991aa6",
|
| 16 |
-
"resNet": "f4d8b0194957a3ad766135505fc70a91343660151a8103bbb6c3b8ac34dbb4e2",
|
| 17 |
-
}
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
class FaceRecognition:
|
| 21 |
-
def __init__(
|
| 22 |
-
self,
|
| 23 |
-
min_similarity: float = 0.67,
|
| 24 |
-
model_name: Literal["mobileNet", "resNet50"] = "mobileNet",
|
| 25 |
-
):
|
| 26 |
-
self.min_similarity = min_similarity
|
| 27 |
-
self.model = tflite.Interpreter(model_path=get_file(BASE_URL + f"{model_name}.tflite", FILE_HASHES[model_name]))
|
| 28 |
-
|
| 29 |
-
def __call__(self, frame, detections):
|
| 30 |
-
# Align Faces
|
| 31 |
-
faces, faces_aligned = [], []
|
| 32 |
-
for detection in detections:
|
| 33 |
-
face = frame[
|
| 34 |
-
int(detection.bbox[0][1]) : int(detection.bbox[1][1]),
|
| 35 |
-
int(detection.bbox[0][0]) : int(detection.bbox[1][0]),
|
| 36 |
-
]
|
| 37 |
-
try:
|
| 38 |
-
face = cv2.resize(face, (112, 112))
|
| 39 |
-
except:
|
| 40 |
-
face = np.zeros((112, 112, 3))
|
| 41 |
-
|
| 42 |
-
faces.append(face)
|
| 43 |
-
faces_aligned.append(self.align(frame, detection.landmarks))
|
| 44 |
-
|
| 45 |
-
# Do Inference
|
| 46 |
-
if len(faces_aligned) == 0:
|
| 47 |
-
return []
|
| 48 |
-
|
| 49 |
-
# Normalize images from [0, 255] to [0, 1]
|
| 50 |
-
faces_aligned_norm = np.asarray(faces_aligned).astype(np.float32) / 255.0
|
| 51 |
-
|
| 52 |
-
embs_det = tflite_inference(self.model, faces_aligned_norm)
|
| 53 |
-
embs_det = np.asarray(embs_det[0])
|
| 54 |
-
|
| 55 |
-
# Save Identities
|
| 56 |
-
identities = []
|
| 57 |
-
for idx, detection in enumerate(detections):
|
| 58 |
-
identities.append(
|
| 59 |
-
Identity(
|
| 60 |
-
detection_idx=detection.idx,
|
| 61 |
-
embedding=embs_det[idx],
|
| 62 |
-
face_aligned=faces_aligned[idx],
|
| 63 |
-
)
|
| 64 |
-
)
|
| 65 |
-
return identities
|
| 66 |
-
|
| 67 |
-
def find_matches(self, identities, gallery):
|
| 68 |
-
if len(gallery) == 0 or len(identities) == 0:
|
| 69 |
-
return []
|
| 70 |
-
|
| 71 |
-
# Get Embeddings
|
| 72 |
-
embs_gal = np.asarray([identity.embedding for identity in gallery])
|
| 73 |
-
embs_det = np.asarray([identity.embedding for identity in identities])
|
| 74 |
-
|
| 75 |
-
# Calculate Cosine Distances
|
| 76 |
-
cos_distances = cosine_distances(embs_det, embs_gal)
|
| 77 |
-
|
| 78 |
-
# Find Matches
|
| 79 |
-
matches = []
|
| 80 |
-
for ident_idx, identity in enumerate(identities):
|
| 81 |
-
dist_to_identity = cos_distances[ident_idx]
|
| 82 |
-
idx_min = np.argmin(dist_to_identity)
|
| 83 |
-
if dist_to_identity[idx_min] < self.min_similarity:
|
| 84 |
-
matches.append(
|
| 85 |
-
Match(
|
| 86 |
-
identity_idx=identity.detection_idx,
|
| 87 |
-
gallery_idx=idx_min,
|
| 88 |
-
distance=dist_to_identity[idx_min],
|
| 89 |
-
name=gallery[idx_min].name,
|
| 90 |
-
)
|
| 91 |
-
)
|
| 92 |
-
|
| 93 |
-
# Sort Matches by identity_idx
|
| 94 |
-
matches = sorted(matches, key=lambda match: match.gallery_idx)
|
| 95 |
-
|
| 96 |
-
return matches
|
| 97 |
-
|
| 98 |
-
@staticmethod
|
| 99 |
-
def align(img, landmarks_source, target_size=(112, 112)):
|
| 100 |
-
landmarks_target = np.array(
|
| 101 |
-
[
|
| 102 |
-
[38.2946, 51.6963],
|
| 103 |
-
[73.5318, 51.5014],
|
| 104 |
-
[56.0252, 71.7366],
|
| 105 |
-
[41.5493, 92.3655],
|
| 106 |
-
[70.7299, 92.2041],
|
| 107 |
-
],
|
| 108 |
-
dtype=np.float32,
|
| 109 |
-
)
|
| 110 |
-
tform = SimilarityTransform()
|
| 111 |
-
tform.estimate(landmarks_source, landmarks_target)
|
| 112 |
-
tmatrix = tform.params[0:2, :]
|
| 113 |
-
face_aligned = cv2.warpAffine(img, tmatrix, target_size, borderValue=0.0)
|
| 114 |
-
return face_aligned
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/gallery.py
DELETED
|
@@ -1,37 +0,0 @@
|
|
| 1 |
-
from .face_detection import FaceDetection
|
| 2 |
-
from .face_recognition import FaceRecognition
|
| 3 |
-
from .nametypes import Identity
|
| 4 |
-
import cv2
|
| 5 |
-
import os
|
| 6 |
-
import numpy as np
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
def init_gallery(files, min_detections_conf=0.8, min_similarity=0.67, model_name="mobileNet"):
|
| 10 |
-
face_detector = FaceDetection(min_detections_conf=min_detections_conf)
|
| 11 |
-
face_recognizer = FaceRecognition(model_name=model_name, min_similarity=min_similarity)
|
| 12 |
-
|
| 13 |
-
gallery = []
|
| 14 |
-
for file in files:
|
| 15 |
-
file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
|
| 16 |
-
img = cv2.cvtColor(cv2.imdecode(file_bytes, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
|
| 17 |
-
# Face Detection
|
| 18 |
-
img, detections = face_detector(img)
|
| 19 |
-
|
| 20 |
-
if detections == []:
|
| 21 |
-
continue
|
| 22 |
-
elif len(detections) > 1:
|
| 23 |
-
detections = detections[:1]
|
| 24 |
-
|
| 25 |
-
# Face Recognition
|
| 26 |
-
identities = face_recognizer(img, detections)
|
| 27 |
-
|
| 28 |
-
# Add to gallery
|
| 29 |
-
gallery.append(
|
| 30 |
-
Identity(
|
| 31 |
-
name=os.path.splitext(file.name)[0],
|
| 32 |
-
embedding=identities[0].embedding,
|
| 33 |
-
face_aligned=identities[0].face_aligned,
|
| 34 |
-
)
|
| 35 |
-
)
|
| 36 |
-
|
| 37 |
-
return gallery
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/nametypes.py
DELETED
|
@@ -1,33 +0,0 @@
|
|
| 1 |
-
from typing import NamedTuple, List
|
| 2 |
-
import numpy as np
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
class Detection(NamedTuple):
|
| 6 |
-
idx: int = None
|
| 7 |
-
bbox: List[List[float]] = None
|
| 8 |
-
landmarks: List[List[float]] = None
|
| 9 |
-
confidence: float = None
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
class Identity(NamedTuple):
|
| 13 |
-
detection_idx: int = None
|
| 14 |
-
name: str = None
|
| 15 |
-
embedding: np.ndarray = None
|
| 16 |
-
face_aligned: np.ndarray = None
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
class Stats(NamedTuple):
|
| 20 |
-
fps: float = 0
|
| 21 |
-
resolution: List[int] = [None, None, None]
|
| 22 |
-
num_faces: int = 0
|
| 23 |
-
detection: float = None
|
| 24 |
-
recognition: float = None
|
| 25 |
-
matching: float = None
|
| 26 |
-
annotation: float = None
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
class Match(NamedTuple):
|
| 30 |
-
identity_idx: int = None
|
| 31 |
-
gallery_idx: int = None
|
| 32 |
-
distance: float = None
|
| 33 |
-
name: str = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/pca.py
DELETED
|
@@ -1,59 +0,0 @@
|
|
| 1 |
-
from sklearn.decomposition import PCA
|
| 2 |
-
import numpy as np
|
| 3 |
-
import plotly.express as px
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
def pca(matches, identities, gallery, dim=3):
|
| 7 |
-
"""
|
| 8 |
-
Perform PCA on embeddings.
|
| 9 |
-
Args:
|
| 10 |
-
embeddings: np.array of shape (n_embeddings, 512)
|
| 11 |
-
Returns:
|
| 12 |
-
embeddings_pca: np.array of shape (n_embeddings, 3)
|
| 13 |
-
"""
|
| 14 |
-
|
| 15 |
-
# Get Gallery and Detection Embeddings and stich them together in groups
|
| 16 |
-
embeddings = np.concatenate(
|
| 17 |
-
[[gallery[match.gallery_idx].embedding, identities[match.identity_idx].embedding] for match in matches],
|
| 18 |
-
axis=0,
|
| 19 |
-
)
|
| 20 |
-
|
| 21 |
-
# Get Identity Names and stich them together in groups
|
| 22 |
-
identity_names = np.concatenate(
|
| 23 |
-
[[gallery[match.gallery_idx].name, gallery[match.gallery_idx].name] for match in matches],
|
| 24 |
-
axis=0,
|
| 25 |
-
)
|
| 26 |
-
|
| 27 |
-
# Do 3D PCA
|
| 28 |
-
pca = PCA(n_components=dim)
|
| 29 |
-
pca.fit(embeddings)
|
| 30 |
-
embeddings_pca = pca.transform(embeddings)
|
| 31 |
-
|
| 32 |
-
if dim == 3:
|
| 33 |
-
fig = px.scatter_3d(
|
| 34 |
-
embeddings_pca,
|
| 35 |
-
x=0,
|
| 36 |
-
y=1,
|
| 37 |
-
z=2,
|
| 38 |
-
opacity=0.7,
|
| 39 |
-
color=identity_names,
|
| 40 |
-
color_discrete_sequence=px.colors.qualitative.Vivid,
|
| 41 |
-
)
|
| 42 |
-
fig.update_traces(marker=dict(size=4))
|
| 43 |
-
elif dim == 2:
|
| 44 |
-
fig = px.scatter(
|
| 45 |
-
embeddings_pca,
|
| 46 |
-
x=0,
|
| 47 |
-
y=1,
|
| 48 |
-
opacity=0.7,
|
| 49 |
-
color=identity_names,
|
| 50 |
-
color_discrete_sequence=px.colors.qualitative.Vivid,
|
| 51 |
-
)
|
| 52 |
-
fig.update_traces(marker=dict(size=4))
|
| 53 |
-
fig.update_xaxes(showgrid=True)
|
| 54 |
-
fig.update_yaxes(showgrid=True)
|
| 55 |
-
else:
|
| 56 |
-
raise ValueError("dim must be either 2 or 3")
|
| 57 |
-
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
|
| 58 |
-
|
| 59 |
-
return fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools/utils.py
DELETED
|
@@ -1,164 +0,0 @@
|
|
| 1 |
-
import logging
|
| 2 |
-
import os
|
| 3 |
-
import streamlit as st
|
| 4 |
-
from twilio.rest import Client
|
| 5 |
-
import os
|
| 6 |
-
import numpy as np
|
| 7 |
-
import hashlib
|
| 8 |
-
import tempfile
|
| 9 |
-
import os
|
| 10 |
-
import hashlib
|
| 11 |
-
from tqdm import tqdm
|
| 12 |
-
from zipfile import ZipFile
|
| 13 |
-
from urllib.request import urlopen
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
logger = logging.getLogger(__name__)
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
@st.cache_data
|
| 20 |
-
def get_ice_servers(name="twilio"):
|
| 21 |
-
"""Get ICE servers from Twilio.
|
| 22 |
-
Returns:
|
| 23 |
-
List of ICE servers.
|
| 24 |
-
"""
|
| 25 |
-
if name == "twilio":
|
| 26 |
-
# Ref: https://www.twilio.com/docs/stun-turn/api
|
| 27 |
-
try:
|
| 28 |
-
account_sid = os.environ["TWILIO_ACCOUNT_SID"]
|
| 29 |
-
auth_token = os.environ["TWILIO_AUTH_TOKEN"]
|
| 30 |
-
except KeyError:
|
| 31 |
-
logger.warning("Twilio credentials are not set. Fallback to a free STUN server from Google.")
|
| 32 |
-
return [{"urls": ["stun:stun.l.google.com:19302"]}]
|
| 33 |
-
|
| 34 |
-
client = Client(account_sid, auth_token)
|
| 35 |
-
|
| 36 |
-
token = client.tokens.create()
|
| 37 |
-
|
| 38 |
-
return token.ice_servers
|
| 39 |
-
|
| 40 |
-
elif name == "metered":
|
| 41 |
-
try:
|
| 42 |
-
username = os.environ["METERED_USERNAME"]
|
| 43 |
-
credential = os.environ["METERED_CREDENTIAL"]
|
| 44 |
-
except KeyError:
|
| 45 |
-
logger.warning("Metered credentials are not set. Fallback to a free STUN server from Google.")
|
| 46 |
-
return [{"urls": ["stun:stun.l.google.com:19302"]}]
|
| 47 |
-
|
| 48 |
-
ice_servers = [
|
| 49 |
-
{"url": "stun:a.relay.metered.ca:80", "urls": "stun:a.relay.metered.ca:80"},
|
| 50 |
-
{
|
| 51 |
-
"url": "turn:a.relay.metered.ca:80",
|
| 52 |
-
"username": username,
|
| 53 |
-
"urls": "turn:a.relay.metered.ca:80",
|
| 54 |
-
"credential": credential,
|
| 55 |
-
},
|
| 56 |
-
{
|
| 57 |
-
"url": "turn:a.relay.metered.ca:80?transport=tcp",
|
| 58 |
-
"username": username,
|
| 59 |
-
"urls": "turn:a.relay.metered.ca:80?transport=tcp",
|
| 60 |
-
"credential": credential,
|
| 61 |
-
},
|
| 62 |
-
{
|
| 63 |
-
"url": "turn:a.relay.metered.ca:443",
|
| 64 |
-
"username": username,
|
| 65 |
-
"urls": "turn:a.relay.metered.ca:443",
|
| 66 |
-
"credential": credential,
|
| 67 |
-
},
|
| 68 |
-
{
|
| 69 |
-
"url": "turn:a.relay.metered.ca:443?transport=tcp",
|
| 70 |
-
"username": username,
|
| 71 |
-
"urls": "turn:a.relay.metered.ca:443?transport=tcp",
|
| 72 |
-
"credential": credential,
|
| 73 |
-
},
|
| 74 |
-
]
|
| 75 |
-
return ice_servers
|
| 76 |
-
else:
|
| 77 |
-
raise ValueError(f"Unknown name: {name}")
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
# Function to format floats within a list
|
| 81 |
-
def format_dflist(val):
|
| 82 |
-
if isinstance(val, list):
|
| 83 |
-
return [format_dflist(num) for num in val]
|
| 84 |
-
if isinstance(val, np.ndarray):
|
| 85 |
-
return np.asarray([format_dflist(num) for num in val])
|
| 86 |
-
if isinstance(val, np.float32):
|
| 87 |
-
return f"{val:.2f}"
|
| 88 |
-
if isinstance(val, float):
|
| 89 |
-
return f"{val:.2f}"
|
| 90 |
-
else:
|
| 91 |
-
return val
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
def rgb(r, g, b):
|
| 95 |
-
return "#{:02x}{:02x}{:02x}".format(r, g, b)
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
def tflite_inference(model, img):
|
| 99 |
-
"""Inferences an image through the model with tflite interpreter on CPU
|
| 100 |
-
:param model: a tflite.Interpreter loaded with a model
|
| 101 |
-
:param img: image
|
| 102 |
-
:return: list of outputs of the model
|
| 103 |
-
"""
|
| 104 |
-
# Check if img is np.ndarray
|
| 105 |
-
if not isinstance(img, np.ndarray):
|
| 106 |
-
img = np.asarray(img)
|
| 107 |
-
|
| 108 |
-
# Check if dim is 4
|
| 109 |
-
if len(img.shape) == 3:
|
| 110 |
-
img = np.expand_dims(img, axis=0)
|
| 111 |
-
|
| 112 |
-
input_details = model.get_input_details()
|
| 113 |
-
output_details = model.get_output_details()
|
| 114 |
-
model.resize_tensor_input(input_details[0]["index"], img.shape)
|
| 115 |
-
model.allocate_tensors()
|
| 116 |
-
model.set_tensor(input_details[0]["index"], img.astype(np.float32))
|
| 117 |
-
model.invoke()
|
| 118 |
-
return [model.get_tensor(elem["index"]) for elem in output_details]
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
def get_file(origin, file_hash, is_zip=False):
|
| 122 |
-
tmp_file = os.path.join(tempfile.gettempdir(), "FaceIDLight", origin.split("/")[-1])
|
| 123 |
-
os.makedirs(os.path.dirname(tmp_file), exist_ok=True)
|
| 124 |
-
if not os.path.exists(tmp_file):
|
| 125 |
-
download = True
|
| 126 |
-
else:
|
| 127 |
-
hasher = hashlib.sha256()
|
| 128 |
-
with open(tmp_file, "rb") as file:
|
| 129 |
-
for chunk in iter(lambda: file.read(65535), b""):
|
| 130 |
-
hasher.update(chunk)
|
| 131 |
-
if not hasher.hexdigest() == file_hash:
|
| 132 |
-
print(
|
| 133 |
-
"A local file was found, but it seems to be incomplete or outdated because the file hash does not "
|
| 134 |
-
"match the original value of " + file_hash + " so data will be downloaded."
|
| 135 |
-
)
|
| 136 |
-
download = True
|
| 137 |
-
else:
|
| 138 |
-
download = False
|
| 139 |
-
|
| 140 |
-
if download:
|
| 141 |
-
response = urlopen(origin)
|
| 142 |
-
with tqdm.wrapattr(
|
| 143 |
-
open(tmp_file, "wb"),
|
| 144 |
-
"write",
|
| 145 |
-
miniters=1,
|
| 146 |
-
desc="Downloading " + origin.split("/")[-1] + " to: " + tmp_file,
|
| 147 |
-
total=getattr(response, "length", None),
|
| 148 |
-
) as file:
|
| 149 |
-
for chunk in response:
|
| 150 |
-
file.write(chunk)
|
| 151 |
-
file.close()
|
| 152 |
-
if is_zip:
|
| 153 |
-
with ZipFile(tmp_file, "r") as zipObj:
|
| 154 |
-
zipObj.extractall(tmp_file.split(".")[0])
|
| 155 |
-
tmp_file = os.path.join(tmp_file.split(".")[0])
|
| 156 |
-
return tmp_file
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
def get_hash(filepath):
|
| 160 |
-
hasher = hashlib.sha256()
|
| 161 |
-
with open(filepath, "rb") as file:
|
| 162 |
-
for chunk in iter(lambda: file.read(65535), b""):
|
| 163 |
-
hasher.update(chunk)
|
| 164 |
-
return hasher.hexdigest()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|