Spaces:

Martlgap
/

LiveFaceID

Running

App Files Files Community

Martlgap commited on May 15, 2023

Commit

bffe7b3

1 Parent(s): 7867a3c

testing on hugging

Browse files

Files changed (12) hide show

app.py +227 -63
app_bak.py +0 -299
packages.txt +1 -0
tools/.DS_Store +0 -0
tools/alignment.py +0 -39
tools/annotation.py +0 -121
tools/detection.py +0 -44
tools/face_recognition.py +204 -0
tools/identification.py +0 -47
tools/nametypes.py +30 -0
tools/utils.py +165 -59
tools/webcam.py +0 -38

app.py CHANGED Viewed

@@ -1,79 +1,243 @@
 import streamlit as st
 import time
-from tools.webcam import init_webcam
 import logging
 # Set logging level to error (To avoid getting spammed by queue warnings etc.)
 logging.basicConfig(level=logging.ERROR)
 # Set page layout for streamlit to wide
-st.set_page_config(layout="wide")
-class KPI:
-    """Class for displaying KPIs in a row
-    Args:
-        keys (list): List of KPI names
-    """
-    def __init__(self, keys):
-        self.kpi_texts = []
-        row = st.columns(len(keys))
-        for kpi, key in zip(row, keys):
-            with kpi:
-                item_row = st.columns(2)
-                item_row[0].markdown(f"**{key}**:")
-                self.kpi_texts.append(item_row[1].markdown("-"))
-    def update_kpi(self, kpi_values):
-        for kpi_text, kpi_value in zip(self.kpi_texts, kpi_values):
-            kpi_text.write(
-                f"<h5 style='text-align: center; color: red;'>{kpi_value:.2f}</h5>"
-                if isinstance(kpi_value, float)
-                else f"<h5 style='text-align: center; color: red;'>{kpi_value}</h5>",
-                unsafe_allow_html=True,
-            )
-# -----------------------------------------------------------------------------------------------
-# Streamlit App
-st.title("FaceID App Demonstration")
-# Get Access to Webcam
-webcam = init_webcam()
-# KPI Section
-st.markdown("**Stats**")
-kpi = KPI(["**FrameRate**"])
-st.markdown("---")
-# Live Stream Display
-stream_display = st.empty()
-st.markdown("---")
-if webcam:
-    prevTime = 0
-    while True:
-        try:
-            # Get Frame from Webcam
-            frame = webcam.get_frame(timeout=10)
-            # Convert to OpenCV Image
-            frame = frame.to_ndarray(format="rgb24")
-        except:
-            continue
-        # DISPLAY THE LIVE STREAM --------------------------------------------------
-        stream_display.image(
-            frame, channels="RGB", caption="Live-Stream", use_column_width=True
-        )
-        # CALCULATE FPS -----------------------------------------------------------
-        currTime = time.time()
-        fps = 1 / (currTime - prevTime)
-        prevTime = currTime
-        # UPDATE KPIS -------------------------------------------------------------
-        kpi.update_kpi([fps])

 import streamlit as st
 import time
+from typing import List
+from streamlit_webrtc import webrtc_streamer, WebRtcMode
 import logging
+import mediapipe as mp
+import tflite_runtime.interpreter as tflite
+import av
+import numpy as np
+import queue
+from streamlit_toggle import st_toggle_switch
+import pandas as pd
+from tools.nametypes import Stats, Detection
+from pathlib import Path
+from tools.utils import get_ice_servers, download_file, display_match, rgb
+from tools.face_recognition import (
+    detect_faces,
+    align_faces,
+    inference,
+    draw_detections,
+    recognize_faces,
+    process_gallery,
+)
+# TODO Error Handling!
 # Set logging level to error (To avoid getting spammed by queue warnings etc.)
+logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.ERROR)
+ROOT = Path(__file__).parent
+MODEL_URL = (
+    "https://github.com/Martlgap/FaceIDLight/releases/download/v.0.1/mobileNet.tflite"
+)
+MODEL_LOCAL_PATH = ROOT / "./models/mobileNet.tflite"
+DETECTION_CONFIDENCE = 0.5
+TRACKING_CONFIDENCE = 0.5
+MAX_FACES = 2
 # Set page layout for streamlit to wide
+st.set_page_config(
+    layout="wide", page_title="FaceID App Demo", page_icon=":sunglasses:"
+)
+with st.sidebar:
+    st.markdown("# Preferences")
+    face_rec_on = st_toggle_switch(
+        "Face Recognition",
+        key="activate_face_rec",
+        default_value=True,
+        active_color=rgb(255, 75, 75),
+        track_color=rgb(50, 50, 50),
+    )
+    st.markdown("## Webcam")
+    resolution = st.selectbox(
+        "Webcam Resolution",
+        [(1920, 1080), (1280, 720), (640, 360)],
+        index=2,
+    )
+    st.markdown("## Face Detection")
+    max_faces = st.number_input("Maximum Number of Faces", value=2, min_value=1)
+    detection_confidence = st.slider(
+        "Min Detection Confidence", min_value=0.0, max_value=1.0, value=0.5
+    )
+    tracking_confidence = st.slider(
+        "Min Tracking Confidence", min_value=0.0, max_value=1.0, value=0.9
+    )
+    on_draw = st_toggle_switch(
+        "Show Drawings",
+        key="show_drawings",
+        default_value=True,
+        active_color=rgb(255, 75, 75),
+        track_color=rgb(100, 100, 100),
+    )
+    st.markdown("## Face Recognition")
+    similarity_threshold = st.slider(
+        "Similarity Threshold", min_value=0.0, max_value=2.0, value=0.67
+    )
+download_file(
+    MODEL_URL,
+    MODEL_LOCAL_PATH,
+    file_hash="6c19b789f661caa8da735566490bfd8895beffb2a1ec97a56b126f0539991aa6",
+)
+# Session-specific caching of the face recognition model
+cache_key = "face_id_model"
+if cache_key in st.session_state:
+    face_recognition_model = st.session_state[cache_key]
+else:
+    face_recognition_model = tflite.Interpreter(model_path=MODEL_LOCAL_PATH.as_posix())
+    st.session_state[cache_key] = face_recognition_model
+# Session-specific caching of the face detection model
+cache_key = "face_detection_model"
+if cache_key in st.session_state:
+    face_detection_model = st.session_state[cache_key]
+else:
+    face_detection_model = mp.solutions.face_mesh.FaceMesh(
+        refine_landmarks=True,
+        min_detection_confidence=detection_confidence,
+        min_tracking_confidence=tracking_confidence,
+        max_num_faces=max_faces,
+    )
+    st.session_state[cache_key] = face_detection_model
+stats_queue: "queue.Queue[Stats]" = queue.Queue()
+detections_queue: "queue.Queue[List[Detection]]" = queue.Queue()
+def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
+    detections = None
+    frame_start = time.time()
+    # Convert frame to numpy array
+    frame = frame.to_ndarray(format="rgb24")
+    # Get frame resolution
+    resolution = frame.shape
+    start = time.time()
+    if face_rec_on:
+        detections = detect_faces(frame, face_detection_model)
+    time_detection = (time.time() - start) * 1000
+    start = time.time()
+    if face_rec_on:
+        detections = align_faces(frame, detections)
+    time_normalization = (time.time() - start) * 1000
+    start = time.time()
+    if face_rec_on:
+        detections = inference(detections, face_recognition_model)
+    time_inference = (time.time() - start) * 1000
+    start = time.time()
+    if face_rec_on:
+        detections = recognize_faces(detections, gallery, similarity_threshold)
+    time_recognition = (time.time() - start) * 1000
+    start = time.time()
+    if face_rec_on and on_draw:
+        frame = draw_detections(frame, detections)
+    time_drawing = (time.time() - start) * 1000
+    # Convert frame back to av.VideoFrame
+    frame = av.VideoFrame.from_ndarray(frame, format="rgb24")
+    # Put detections, stats and timings into queues (to be accessible by other thread)
+    if face_rec_on:
+        detections_queue.put(detections)
+    stats_queue.put(
+        Stats(
+            fps=1 / (time.time() - frame_start),
+            resolution=resolution,
+            num_faces=len(detections) if detections else 0,
+            detection=time_detection,
+            normalization=time_normalization,
+            inference=time_inference,
+            recognition=time_recognition,
+            drawing=time_drawing,
+        )
+    )
+    return frame
+# Streamlit app
+st.title("FaceID App Demonstration")
+st.sidebar.markdown("**Gallery**")
+gallery = st.sidebar.file_uploader(
+    "Upload images to gallery", type=["png", "jpg", "jpeg"], accept_multiple_files=True
+)
+if gallery:
+    gallery = process_gallery(gallery, face_detection_model, face_recognition_model)
+    st.sidebar.markdown("**Gallery Images**")
+    st.sidebar.image(
+        [identity.image for identity in gallery],
+        caption=[identity.name for identity in gallery],
+        width=112,
+    )
+st.markdown("**Stats**")
+stats = st.empty()
+ctx = webrtc_streamer(
+    key="FaceIDAppDemo",
+    mode=WebRtcMode.SENDRECV,
+    rtc_configuration={"iceServers": get_ice_servers("twilio")},
+    video_frame_callback=video_frame_callback,
+    media_stream_constraints={
+        "video": {
+            "width": {
+                "min": resolution[0],
+                "ideal": resolution[0],
+                "max": resolution[0],
+            }
+        },
+        "audio": False,
+    },
+    async_processing=False,  # WHAT IS THIS?
+)
+st.markdown("**Timings [ms]**")
+timings = st.empty()
+st.markdown("**Identified Faces**")
+identified_faces = st.empty()
+st.markdown("**Detections**")
+detections = st.empty()
+# Display Live Stats
+if ctx.state.playing:
+    while True:
+        stats_dataframe = pd.DataFrame([stats_queue.get()])
+        stats.dataframe(stats_dataframe.style.format(thousands=" ", precision=2))
+        detections_data = detections_queue.get()
+        detections_dataframe = pd.DataFrame(detections_data).drop(
+            columns=["face", "face_match"], errors="ignore"
+        )
+        # Apply formatting to DataFrame
+        # print(detections_dataframe.columns)
+        # detections_dataframe["embedding"] = detections_dataframe["embedding"].embedding.applymap(format_floats)
+        detections.dataframe(detections_dataframe)
+        identified_faces.image(
+            [display_match(d) for d in detections_data if d.name is not None],
+            caption=[
+                d.name + f"({d.distance:2f})"
+                for d in detections_data
+                if d.name is not None
+            ],
+            width=112,
+        )  # TODO formatting
+        # time.sleep(1)

app_bak.py DELETED Viewed

@@ -1,299 +0,0 @@
-import streamlit as st
-import streamlit_toggle as tog
-import time
-import numpy as np
-import cv2
-from tools.annotation import draw_mesh, draw_landmarks, draw_bounding_box, draw_text
-from tools.alignment import align_faces
-from tools.identification import load_identification_model, inference, identify
-from tools.utils import show_images, show_faces, rgb
-from tools.detection import load_detection_model, detect_faces
-from tools.webcam import init_webcam
-import logging
-# Set logging level to error (To avoid getting spammed by queue warnings etc.)
-logging.basicConfig(level=logging.ERROR)
-# Set page layout for streamlit to wide
-st.set_page_config(layout="wide")
-# Initialize the Face Detection and Identification Models
-detection_model = load_detection_model(max_faces=2, detection_confidence=0.5, tracking_confidence=0.9)
-identification_model = load_identification_model(name="MobileNet")
-# Gallery Processing
-@st.cache_data
-def gallery_processing(gallery_files):
-    """Process the gallery images (Complete Face Recognition Pipeline)
-    Args:
-        gallery_files (_type_): Files uploaded by the user
-    Returns:
-        _type_: Gallery Images, Gallery Embeddings, Gallery Names
-    """
-    gallery_images, gallery_embs, gallery_names = [], [], []
-    if gallery_files is not None:
-        for file in gallery_files:
-            file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
-            img = cv2.cvtColor(
-                cv2.imdecode(file_bytes, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB
-            )
-            gallery_names.append(
-                file.name.split(".jpg")[0].split(".png")[0].split(".jpeg")[0]
-            )
-            detections = detect_faces(img, detection_model)
-            aligned_faces = align_faces(img, np.asarray([detections[0]]))
-            gallery_images.append(aligned_faces[0])
-            gallery_embs.append(inference(aligned_faces, identification_model)[0])
-    return gallery_images, gallery_embs, gallery_names
-class SideBar:
-    """A class to handle the sidebar
-    """
-    def __init__(self):
-        with st.sidebar:
-            st.markdown("# Preferences")
-            self.on_face_recognition = tog.st_toggle_switch(
-                "Face Recognition", key="activate_face_rec", default_value=True, active_color=rgb(255, 75, 75), track_color=rgb(50, 50, 50)
-            )
-            st.markdown("---")
-            st.markdown("## Webcam")
-            self.resolution = st.selectbox(
-                "Webcam Resolution",
-                [(1920, 1080), (1280, 720), (640, 360)],
-                index=2,
-            )
-            st.markdown("To change webcam resolution: Please refresh page and select resolution before starting webcam stream.")
-            st.markdown("---")
-            st.markdown("## Face Detection")
-            self.max_faces = st.number_input(
-                "Maximum Number of Faces", value=2, min_value=1
-            )
-            self.detection_confidence = st.slider(
-                "Min Detection Confidence", min_value=0.0, max_value=1.0, value=0.5
-            )
-            self.tracking_confidence = st.slider(
-                "Min Tracking Confidence", min_value=0.0, max_value=1.0, value=0.9
-            )
-            switch1, switch2 = st.columns(2)
-            with switch1:
-                self.on_bounding_box = tog.st_toggle_switch(
-                    "Show Bounding Box", key="show_bounding_box", default_value=True, active_color=rgb(255, 75, 75), track_color=rgb(50, 50, 50)
-                )
-            with switch2:
-                self.on_five_landmarks = tog.st_toggle_switch(
-                    "Show Five Landmarks", key="show_five_landmarks", default_value=True, active_color=rgb(255, 75, 75),
-                    track_color=rgb(50, 50, 50)
-                )
-            switch3, switch4 = st.columns(2)
-            with switch3:
-                self.on_mesh = tog.st_toggle_switch(
-                    "Show Mesh", key="show_mesh", default_value=True, active_color=rgb(255, 75, 75),
-                    track_color=rgb(50, 50, 50)
-                )
-            with switch4:
-                self.on_text = tog.st_toggle_switch(
-                    "Show Text", key="show_text", default_value=True, active_color=rgb(255, 75, 75),
-                    track_color=rgb(50, 50, 50)
-                )
-            st.markdown("---")
-            st.markdown("## Face Recognition")
-            self.similarity_threshold = st.slider(
-                "Similarity Threshold", min_value=0.0, max_value=2.0, value=0.67
-            )
-            self.on_show_faces = tog.st_toggle_switch(
-                "Show Recognized Faces", key="show_recognized_faces", default_value=True, active_color=rgb(255, 75, 75), track_color=rgb(50, 50, 50)
-            )
-            self.model_name = st.selectbox(
-                "Model",
-                ["MobileNet", "ResNet"],
-                index=0,
-            )
-            st.markdown("---")
-            st.markdown("## Gallery")
-            self.uploaded_files = st.file_uploader(
-                "Choose multiple images to upload", accept_multiple_files=True
-            )
-            self.gallery_images, self.gallery_embs, self.gallery_names= gallery_processing(self.uploaded_files)
-            st.markdown("**Gallery Faces**")
-            show_images(self.gallery_images, self.gallery_names, 3)
-            st.markdown("---")
-class KPI:
-    """Class for displaying KPIs in a row
-    Args:
-        keys (list): List of KPI names
-    """
-    def __init__(self, keys):
-        self.kpi_texts = []
-        row = st.columns(len(keys))
-        for kpi, key in zip(row, keys):
-            with kpi:
-                item_row = st.columns(2)
-                item_row[0].markdown(f"**{key}**:")
-                self.kpi_texts.append(item_row[1].markdown("-"))
-    def update_kpi(self, kpi_values):
-        for kpi_text, kpi_value in zip(self.kpi_texts, kpi_values):
-            kpi_text.write(
-                f"<h5 style='text-align: center; color: red;'>{kpi_value:.2f}</h5>"
-                if isinstance(kpi_value, float)
-                else f"<h5 style='text-align: center; color: red;'>{kpi_value}</h5>",
-                unsafe_allow_html=True,
-            )
-# -----------------------------------------------------------------------------------------------
-# Streamlit App
-st.title("FaceID App Demonstration")
-# Sidebar
-sb = SideBar()
-# Get Access to Webcam
-webcam = init_webcam(width=sb.resolution[0])
-# KPI Section
-st.markdown("**Stats**")
-kpi = KPI([
-    "**FrameRate**",
-    "**Detected Faces**",
-    "**Image Dims**",
-    "**Detection [ms]**",
-    "**Normalization [ms]**",
-    "**Inference [ms]**",
-    "**Recognition [ms]**",
-    "**Annotations [ms]**",
-    "**Show Faces [ms]**",
-])
-st.markdown("---")
-# Live Stream Display
-stream_display = st.empty()
-st.markdown("---")
-# Display Detected Faces
-st.markdown("**Detected Faces**")
-face_window = st.empty()
-st.markdown("---")
-if webcam:
-    prevTime = 0
-    while True:
-        # Init times to "-" to show something if face recognition is turned off
-        time_detection = "-"
-        time_alignment = "-"
-        time_inference = "-"
-        time_identification = "-"
-        time_annotations = "-"
-        time_show_faces = "-"
-        try:
-            # Get Frame from Webcam
-            frame = webcam.get_frame(timeout=1)
-            # Convert to OpenCV Image
-            frame = frame.to_ndarray(format="rgb24")
-        except:
-            continue
-        # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-        # FACE RECOGNITION PIPELINE
-        if sb.on_face_recognition:
-            # FACE DETECTION ---------------------------------------------------------
-            start_time = time.time()
-            detections = detect_faces(frame, detection_model)
-            time_detection = (time.time() - start_time) * 1000
-            # FACE ALIGNMENT ---------------------------------------------------------
-            start_time = time.time()
-            aligned_faces = align_faces(frame, detections)
-            time_alignment = (time.time() - start_time) * 1000
-            # INFERENCE --------------------------------------------------------------
-            start_time = time.time()
-            if len(sb.gallery_embs) > 0:
-                faces_embs = inference(aligned_faces, identification_model)
-            else:
-                faces_embs = []
-            time_inference = (time.time() - start_time) * 1000
-            # FACE IDENTIFCATION -----------------------------------------------------
-            start_time = time.time()
-            if len(faces_embs) > 0 and len(sb.gallery_embs) > 0:
-                ident_names, ident_dists, ident_imgs = identify(faces_embs, sb.gallery_embs, sb.gallery_names, sb.gallery_images, thresh=sb.similarity_threshold)
-            else:
-                ident_names, ident_dists, ident_imgs = [], [], []
-            time_identification = (time.time() - start_time) * 1000
-            # ANNOTATIONS ------------------------------------------------------------
-            start_time = time.time()
-            frame = cv2.resize(frame, (1920, 1080)) # to make annotation in HD
-            frame.flags.writeable = True  # (hack to make annotations faster)
-            if sb.on_mesh:
-                frame = draw_mesh(frame, detections)
-            if sb.on_five_landmarks:
-                frame = draw_landmarks(frame, detections)
-            if sb.on_bounding_box:
-                frame = draw_bounding_box(frame, detections, ident_names)
-            if sb.on_text:
-                frame = draw_text(frame, detections, ident_names)
-            time_annotations = (time.time() - start_time) * 1000
-            # DISPLAY DETECTED FACES -------------------------------------------------
-            start_time = time.time()
-            if sb.on_show_faces:
-                show_faces(
-                aligned_faces,
-                ident_names,
-                ident_dists,
-                ident_imgs,
-                num_cols=3,
-                channels="RGB",
-                display=face_window,
-            )
-            time_show_faces = (time.time() - start_time) * 1000
-        # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-        # DISPLAY THE LIVE STREAM --------------------------------------------------
-        stream_display.image(
-            frame, channels="RGB", caption="Live-Stream", use_column_width=True
-        )
-        # CALCULATE FPS -----------------------------------------------------------
-        currTime = time.time()
-        fps = 1 / (currTime - prevTime)
-        prevTime = currTime
-        # UPDATE KPIS -------------------------------------------------------------
-        kpi.update_kpi(
-            [
-                fps,
-                len(detections),
-                sb.resolution,
-                time_detection,
-                time_alignment,
-                time_inference,
-                time_identification,
-                time_annotations,
-                time_show_faces,
-            ]
-        )

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ libgl1-mesa-glx

tools/.DS_Store DELETED Viewed

Binary file (6.15 kB)

tools/alignment.py DELETED Viewed

@@ -1,39 +0,0 @@
-import numpy as np
-import cv2
-from skimage.transform import SimilarityTransform
-FIVE_LANDMARKS = [470, 475, 1, 57, 287]
-def align(img, landmarks, target_size=(112, 112)):
-    dst = np.array(
-        [
-            [
-                landmarks.landmark[i].x * img.shape[1],
-                landmarks.landmark[i].y * img.shape[0],
-            ]
-            for i in FIVE_LANDMARKS
-        ],
-    )
-    src = np.array(
-        [
-            [38.2946, 51.6963],
-            [73.5318, 51.5014],
-            [56.0252, 71.7366],
-            [41.5493, 92.3655],
-            [70.7299, 92.2041],
-        ],
-        dtype=np.float32,
-    )
-    tform = SimilarityTransform()
-    tform.estimate(dst, src)
-    tmatrix = tform.params[0:2, :]
-    return cv2.warpAffine(img, tmatrix, target_size, borderValue=0.0)
-def align_faces(img, detections):
-    aligned_faces = [align(img, detection.multi_face_landmarks) for detection in detections]
-    return aligned_faces

tools/annotation.py DELETED Viewed

@@ -1,121 +0,0 @@
-import cv2
-import mediapipe as mp
-import streamlit as st
-FIVE_LANDMARKS = [470, 475, 1, 57, 287]
-FACE_CONNECTIONS = mp.solutions.face_mesh_connections.FACEMESH_TESSELATION
-def draw_bounding_box(img, detections, ident_names, margin=10):
-    # Draw the bounding box on the original frame
-    for detection, name in zip(detections, ident_names):
-        color = (255, 0, 0) if name == "Unknown" else (0, 255, 0)
-        x_coords = [
-            landmark.x * img.shape[1] for landmark in detection.multi_face_landmarks.landmark
-        ]
-        y_coords = [
-            landmark.y * img.shape[0] for landmark in detection.multi_face_landmarks.landmark
-        ]
-        x_min, x_max = int(min(x_coords) - margin), int(max(x_coords) + margin)
-        y_min, y_max = int(min(y_coords) - margin), int(max(y_coords) + margin)
-        cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color, 2)
-        cv2.rectangle(img, (x_min, y_min - img.shape[0] // 25), (x_max, y_min), color, -1)
-    return img
-def draw_text(
-    img,
-    detections,
-    ident_names,
-    margin=10,
-    font_scale=1,
-    font_color=(0, 0, 0),
-    font=cv2.FONT_HERSHEY_SIMPLEX,
-):
-    font_scale = img.shape[0] / 1000
-    for detection, name in zip(detections, ident_names):
-        x_coords = [
-            landmark.x * img.shape[1] for landmark in detection.multi_face_landmarks.landmark
-        ]
-        y_coords = [
-            landmark.y * img.shape[0] for landmark in detection.multi_face_landmarks.landmark
-        ]
-        x_min = int(min(x_coords) - margin)
-        y_min = int(min(y_coords) - margin)
-        cv2.putText(
-            img,
-            name,
-            (x_min + img.shape[0] // 400, y_min - img.shape[0] // 100),
-            font,
-            font_scale,
-            font_color,
-            2,
-        )
-    return img
-def draw_mesh(img, detections):
-    for detection in detections:
-        # Draw the connections
-        for connection in FACE_CONNECTIONS:
-            cv2.line(
-                img,
-                (
-                    int(detection.multi_face_landmarks.landmark[connection[0]].x * img.shape[1]),
-                    int(detection.multi_face_landmarks.landmark[connection[0]].y * img.shape[0]),
-                ),
-                (
-                    int(detection.multi_face_landmarks.landmark[connection[1]].x * img.shape[1]),
-                    int(detection.multi_face_landmarks.landmark[connection[1]].y * img.shape[0]),
-                ),
-                (255, 255, 255),
-                1,
-            )
-        # Draw the landmarks
-        for points in detection.multi_face_landmarks.landmark:
-            cv2.circle(
-                img,
-                (
-                    int(points.x * img.shape[1]),
-                    int(points.y * img.shape[0]),
-                ),
-                1,
-                (0, 255, 0),
-                -1,
-            )
-    return img
-def draw_landmarks(img, detections):
-    # Draw the face landmarks on the original frame
-    for points in FIVE_LANDMARKS:
-        for detection in detections:
-            cv2.circle(
-                img,
-                (
-                    int(
-                        detection.multi_face_landmarks.landmark[points].x
-                        * img.shape[1]
-                    ),
-                    int(
-                        detection.multi_face_landmarks.landmark[points].y
-                        * img.shape[0]
-                    ),
-                ),
-                5,
-                (0, 0, 255),
-                -1,
-            )
-    return img

tools/detection.py DELETED Viewed

@@ -1,44 +0,0 @@
-import mediapipe as mp
-import streamlit as st
-class Detection:
-    multi_face_bboxes = []
-    multi_face_landmarks = []
-#@st.cache_resource
-def load_detection_model(max_faces=2, detection_confidence=0.5, tracking_confidence=0.5):
-    model = mp.solutions.face_mesh.FaceMesh(
-            refine_landmarks=True,
-            min_detection_confidence=detection_confidence,
-            min_tracking_confidence=tracking_confidence,
-            max_num_faces=max_faces,
-        )
-    return model
-def detect_faces(frame, model):
-    # Process the frame with MediaPipe Face Mesh
-    results = model.process(frame)
-    # Get the Bounding Boxes from the detected faces
-    detections = []
-    if results.multi_face_landmarks:
-        for landmarks in results.multi_face_landmarks:
-            x_coords = [
-                landmark.x * frame.shape[1] for landmark in landmarks.landmark
-            ]
-            y_coords = [
-                landmark.y * frame.shape[0] for landmark in landmarks.landmark
-            ]
-            x_min, x_max = int(min(x_coords)), int(max(x_coords))
-            y_min, y_max = int(min(y_coords)), int(max(y_coords))
-            detection = Detection()
-            detection.multi_face_bboxes=[x_min, y_min, x_max, y_max]
-            detection.multi_face_landmarks=landmarks
-            detections.append(detection)
-    return detections

tools/face_recognition.py ADDED Viewed

	@@ -0,0 +1,204 @@

+from .nametypes import Detection, Identity
+import numpy as np
+import cv2
+from sklearn.metrics.pairwise import cosine_distances
+from skimage.transform import SimilarityTransform
+def detect_faces(frame, model):
+    # Process the frame with MediaPipe Face Mesh
+    results = model.process(frame)
+    # Get the Bounding Boxes from the detected faces
+    detections = []
+    if results.multi_face_landmarks:
+        for face in results.multi_face_landmarks:
+            xs = [landmark.x for landmark in face.landmark]
+            ys = [landmark.y for landmark in face.landmark]
+            bbox = [min(xs), min(ys), max(xs), max(ys)]
+            FIVE_LANDMARKS = [470, 475, 1, 57, 287]
+            landmarks = [
+                [face.landmark[i].x, face.landmark[i].y] for i in FIVE_LANDMARKS
+            ]
+            detections.append(Detection(bbox=bbox, landmarks=landmarks))
+    return detections
+def align(img, landmarks, target_size=(112, 112)):
+    # Transform to Landmark-Coordinates from relative landmark positions
+    dst = np.asarray(landmarks) * img.shape[:2][::-1]
+    # Target Landmarks-Coordinates from ArcFace Paper
+    src = np.array(
+        [
+            [38.2946, 51.6963],
+            [73.5318, 51.5014],
+            [56.0252, 71.7366],
+            [41.5493, 92.3655],
+            [70.7299, 92.2041],
+        ],
+        dtype=np.float32,
+    )
+    # Estimate the transformation matrix
+    tform = SimilarityTransform()
+    tform.estimate(dst, src)
+    tmatrix = tform.params[0:2, :]
+    # Apply the transformation matrix
+    img = cv2.warpAffine(img, tmatrix, target_size, borderValue=0.0)
+    return img
+def align_faces(img, detections):
+    updated_detections = []
+    for detection in detections:
+        updated_detections.append(
+            detection._replace(face=align(img, detection.landmarks))
+        )
+    return updated_detections
+# TODO Error when uploading image while running!
+def inference(detections, model):
+    updated_detections = []
+    faces = [detection.face for detection in detections if detection.face is not None]
+    if len(faces) > 0:
+        faces = np.asarray(faces).astype(np.float32) / 255
+        model.resize_tensor_input(model.get_input_details()[0]["index"], faces.shape)
+        model.allocate_tensors()
+        model.set_tensor(model.get_input_details()[0]["index"], faces)
+        model.invoke()
+        embs = [model.get_tensor(elem["index"]) for elem in model.get_output_details()][
+            0
+        ]
+        for idx, detection in enumerate(detections):
+            updated_detections.append(detection._replace(emdedding=embs[idx]))
+    return updated_detections
+def recognize_faces(detections, gallery, thresh=0.67):
+    if len(gallery) == 0 or len(detections) == 0:
+        return detections
+    gallery_embs = np.asarray([identity.embedding for identity in gallery])
+    detection_embs = np.asarray([detection.emdedding for detection in detections])
+    cos_distances = cosine_distances(detection_embs, gallery_embs)
+    updated_detections = []
+    for idx, detection in enumerate(detections):
+        idx_min = np.argmin(cos_distances[idx])
+        if thresh and cos_distances[idx][idx_min] > thresh:
+            dist = cos_distances[idx][idx_min]
+            pred = None
+        else:
+            dist = cos_distances[idx][idx_min]
+            pred = idx_min
+        updated_detections.append(
+            detection._replace(
+                name=gallery[pred].name.split(".jpg")[0].split(".png")[0].split(".jpeg")[0] if pred is not None else None,
+                emdedding_match=gallery[pred].embedding if pred is not None else None,
+                face_match=gallery[pred].image if pred is not None else None,
+                distance=dist,
+            )
+        )
+    return updated_detections
+def process_gallery(files, face_detection_model, face_recognition_model):
+    gallery = []
+    for file in files:
+        file_bytes = np.asarray(bytearray(file.read()), dtype=np.uint8)
+        img = cv2.cvtColor(
+            cv2.imdecode(file_bytes, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB
+        )
+        detections = detect_faces(img, face_detection_model)
+        # We accept only one face per image!
+        if detections == []:
+            continue
+        elif len(detections) > 1:
+            detections = detections[:1]
+        detections = align_faces(img, detections)
+        detections = inference(detections, face_recognition_model)
+        gallery.append(
+            Identity(
+                name=file.name,
+                embedding=detections[0].emdedding,
+                image=detections[0].face,
+            )
+        )
+    return gallery
+def draw_detections(
+    frame, detections, bbox=True, landmarks=True, name=True, upscale=True
+):
+    if upscale:
+        frame = cv2.resize(
+            frame, (1920, 1080)
+        )  # Upscale frame for better visualization
+    shape = np.asarray(frame.shape[:2][::-1])
+    for detection in detections:
+        # Draw Landmarks
+        if landmarks:
+            for landmark in detection.landmarks:
+                cv2.circle(
+                    frame,
+                    (np.asarray(landmark) * shape).astype(int),
+                    5,
+                    (0, 0, 255),
+                    -1,
+                )
+        # Draw Bounding Box
+        if bbox:
+            cv2.rectangle(
+                frame,
+                (np.asarray(detection.bbox[:2]) * shape).astype(int),
+                (np.asarray(detection.bbox[2:]) * shape).astype(int),
+                (0, 255, 0),
+                2,
+            )
+        # Draw Name
+        if name:
+            cv2.rectangle(
+                frame,
+                (
+                    int(detection.bbox[0] * shape[0]),
+                    int(detection.bbox[1] * shape[1] - (shape[1] // 25)),
+                ),
+                (int(detection.bbox[2] * shape[0]), int(detection.bbox[1] * shape[1])),
+                (255, 255, 255),
+                -1,
+            )
+            cv2.putText(
+                frame,
+                detection.name,
+                (
+                    int(detection.bbox[0] * shape[0] + shape[0] // 400),
+                    int(detection.bbox[1] * shape[1] - shape[1] // 100),
+                ),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                1,
+                (0, 0, 0),
+                2,
+            )
+    return frame

tools/identification.py DELETED Viewed

@@ -1,47 +0,0 @@
-import numpy as np
-import tflite_runtime.interpreter as tflite
-from sklearn.metrics.pairwise import cosine_distances
-import streamlit as st
-import time
-MODEL_PATHS = {
-    "MobileNet": "./models/mobileNet.tflite",
-    "ResNet": "./models/resNet.tflite",
-}
-#@st.cache_resource
-def load_identification_model(name="MobileNet"):
-    model = tflite.Interpreter(model_path=MODEL_PATHS[name])
-    return model
-def inference(imgs, model):
-    if len(imgs) > 0:
-        imgs = np.asarray(imgs).astype(np.float32) / 255
-        model.resize_tensor_input(model.get_input_details()[0]["index"], imgs.shape)
-        model.allocate_tensors()
-        model.set_tensor(model.get_input_details()[0]["index"], imgs)
-        model.invoke()
-        embs = [model.get_tensor(elem["index"]) for elem in model.get_output_details()]
-        return embs[0]
-    else:
-        return []
-def identify(embs_src, embs_gal, labels_gal, imgs_gal, thresh=None):
-    all_dists = cosine_distances(embs_src, embs_gal)
-    ident_names, ident_dists, ident_imgs = [], [], []
-    for dists in all_dists:
-        idx_min = np.argmin(dists)
-        if thresh and dists[idx_min] > thresh:
-            dist = dists[idx_min]
-            pred = None
-        else:
-            dist = dists[idx_min]
-            pred = idx_min
-        ident_names.append(labels_gal[pred] if pred is not None else "Unknown")
-        ident_dists.append(dist)
-        ident_imgs.append(imgs_gal[pred] if pred is not None else None)
-    return ident_names, ident_dists, ident_imgs

tools/nametypes.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from typing import NamedTuple, List
+import numpy as np
+class Detection(NamedTuple):
+    bbox: List[int]
+    landmarks: List[List[int]]
+    name: str = None
+    face: np.ndarray = None
+    emdedding: np.ndarray = None
+    emdedding_match: np.ndarray = None
+    face_match: np.ndarray = None
+    distance: float = None
+class Stats(NamedTuple):
+    fps: float
+    resolution: List[int]
+    num_faces: int
+    detection: float
+    normalization: float
+    inference: float
+    recognition: float
+    drawing: float
+class Identity(NamedTuple):
+    name: str
+    embedding: np.ndarray
+    image: np.ndarray

tools/utils.py CHANGED Viewed

@@ -1,66 +1,172 @@
 import streamlit as st
 import cv2
-def rgb(r, g, b):
-    return '#{:02x}{:02x}{:02x}'.format(r, g, b)
-def show_images(images, names, num_cols, channels="RGB"):
-    num_images = len(images)
-    # Calculate the number of rows and columns
-    num_rows = -(
-        -num_images // num_cols
-    )  # This also handles the case when num_images is not a multiple of num_cols
-    for row in range(num_rows):
-        # Create the columns
-        cols = st.sidebar.columns(num_cols)
-        for i, col in enumerate(cols):
-            idx = row * num_cols + i
-            if idx < num_images:
-                img = images[idx]
-                if len(names) == 0:
-                    names = ["Unknown"] * len(images)
-                name = names[idx]
-                col.image(img, caption=name, channels=channels, width=112)
-def show_faces(images, names, distances, gal_images, num_cols, channels="RGB", display=st):
-    if len(images) == 0 or len(names) == 0:
-        display.write("No faces detected, or gallery empty!")
-        return
-    # Calculate the number of rows and columns
-    num_rows = -(
-        -len(images) // num_cols
-    )  # This also handles the case when num_images is not a multiple of num_cols
-    for row in range(num_rows):
-        # Create the columns
-        cols = display.columns(num_cols)
-        for i, col in enumerate(cols):
-            idx = row * num_cols + i
-            if idx < len(images):
-                img = images[idx]
-                name = names[idx]
-                dist = distances[idx]
-                col.image(img, channels=channels, width=112)
-                if gal_images[idx] is not None:
-                    col.text("  ⬍ matching ⬍")
-                    col.image(gal_images[idx], caption=name, channels=channels, width=112)
-                else:
-                    col.markdown("")
-                    col.write("No match found")
-                col.markdown(
-                    f"**Distance: {dist:.4f}**" if dist else f"**Distance: -**"
                 )
             else:
-                col.empty()
-                col.markdown("")
-                col.empty()
-                col.markdown("")

+import logging
+import os
+import urllib.request
+from pathlib import Path
 import streamlit as st
+from twilio.rest import Client
+import os
 import cv2
+import numpy as np
+import hashlib
+logger = logging.getLogger(__name__)
+@st.cache_data
+def get_ice_servers(name="twilio"):
+    """Get ICE servers from Twilio.
+    Returns:
+        List of ICE servers.
+    """
+    if name == "twilio":
+        # Ref: https://www.twilio.com/docs/stun-turn/api
+        try:
+            account_sid = os.environ["TWILIO_ACCOUNT_SID"]
+            auth_token = os.environ["TWILIO_AUTH_TOKEN"]
+        except KeyError:
+            logger.warning(
+                "Twilio credentials are not set. Fallback to a free STUN server from Google."
+            )
+            return [{"urls": ["stun:stun.l.google.com:19302"]}]
+        client = Client(account_sid, auth_token)
+        token = client.tokens.create()
+        return token.ice_servers
+    elif name == "metered":
+        try:
+            username = os.environ["METERED_USERNAME"]
+            credential = os.environ["METERED_CREDENTIAL"]
+        except KeyError:
+            logger.warning(
+                "Metered credentials are not set. Fallback to a free STUN server from Google."
+            )
+            return [{"urls": ["stun:stun.l.google.com:19302"]}]
+        ice_servers = [
+            {"url": "stun:a.relay.metered.ca:80", "urls": "stun:a.relay.metered.ca:80"},
+            {
+                "url": "turn:a.relay.metered.ca:80",
+                "username": username,
+                "urls": "turn:a.relay.metered.ca:80",
+                "credential": credential,
+            },
+            {
+                "url": "turn:a.relay.metered.ca:80?transport=tcp",
+                "username": username,
+                "urls": "turn:a.relay.metered.ca:80?transport=tcp",
+                "credential": credential,
+            },
+            {
+                "url": "turn:a.relay.metered.ca:443",
+                "username": username,
+                "urls": "turn:a.relay.metered.ca:443",
+                "credential": credential,
+            },
+            {
+                "url": "turn:a.relay.metered.ca:443?transport=tcp",
+                "username": username,
+                "urls": "turn:a.relay.metered.ca:443?transport=tcp",
+                "credential": credential,
+            },
+        ]
+        return ice_servers
+    else:
+        raise ValueError(f"Unknown name: {name}")
+def get_hash(filepath):
+    hasher = hashlib.sha256()
+    with open(filepath, "rb") as file:
+        for chunk in iter(lambda: file.read(65535), b""):
+            hasher.update(chunk)
+    return hasher.hexdigest()
+def download_file(url, model_path: Path, file_hash=None):
+    if model_path.exists():
+        if file_hash:
+            hasher = hashlib.sha256()
+            with open(model_path, "rb") as file:
+                for chunk in iter(lambda: file.read(65535), b""):
+                    hasher.update(chunk)
+            if not hasher.hexdigest() == file_hash:
+                print(
+                    "A local file was found, but it seems to be incomplete or outdated because the file hash does not "
+                    "match the original value of "
+                    + file_hash
+                    + " so data will be downloaded."
                 )
+                download = True
             else:
+                print("Using a verified local file.")
+                download = False
+    else:
+        model_path.mkdir(parents=True, exist_ok=True)
+        print("Downloading data ...")
+        download = True
+    if download:
+        # These are handles to two visual elements to animate.
+        weights_warning, progress_bar = None, None
+        try:
+            weights_warning = st.warning("Downloading %s..." % url)
+            progress_bar = st.progress(0)
+            with open(model_path, "wb") as output_file:
+                with urllib.request.urlopen(url) as response:
+                    length = int(response.info()["Content-Length"])
+                    counter = 0.0
+                    MEGABYTES = 2.0**20.0
+                    while True:
+                        data = response.read(8192)
+                        if not data:
+                            break
+                        counter += len(data)
+                        output_file.write(data)
+                        # We perform animation by overwriting the elements.
+                        weights_warning.warning(
+                            "Downloading %s... (%6.2f/%6.2f MB)"
+                            % (url, counter / MEGABYTES, length / MEGABYTES)
+                        )
+                        progress_bar.progress(min(counter / length, 1.0))
+        # Finally, we remove these visual elements by calling .empty().
+        finally:
+            if weights_warning is not None:
+                weights_warning.empty()
+            if progress_bar is not None:
+                progress_bar.empty()
+# Function to format floats within a list
+def format_floats(val):
+    if isinstance(val, list):
+        return [f"{num:.2f}" for num in val]
+    if isinstance(val, np.ndarray):
+        return np.asarray([f"{num:.2f}" for num in val])
+    else:
+        return val
+def display_match(d):
+    im = np.concatenate([d.face, d.face_match])
+    border_size = 2
+    border = cv2.copyMakeBorder(
+        im,
+        top=border_size,
+        bottom=border_size,
+        left=border_size,
+        right=border_size,
+        borderType=cv2.BORDER_CONSTANT,
+        value=(255, 255, 120)
+    )
+    return border
+def rgb(r, g, b):
+    return '#{:02x}{:02x}{:02x}'.format(r, g, b)

tools/webcam.py DELETED Viewed

@@ -1,38 +0,0 @@
-import streamlit as st
-from streamlit_webrtc import webrtc_streamer, WebRtcMode
-import os
-from twilio.rest import Client
-account_sid = os.environ['TWILIO_ACCOUNT_SID']
-auth_token = os.environ['TWILIO_AUTH_TOKEN']
-client = Client(account_sid, auth_token)
-token = client.tokens.create()
-RTC_CONFIGURATION={
-  "iceServers": token.ice_servers
-}
-def init_webcam(width=680):
-    ctx = webrtc_streamer(
-        key="FaceIDAppDemo",
-        mode=WebRtcMode.SENDONLY,
-        rtc_configuration=RTC_CONFIGURATION,
-        media_stream_constraints={
-            "video": {
-                "width": {
-                    "min": width,
-                    "ideal": width,
-                    "max": width,
-                },
-            },
-            "audio": False,
-        },
-        video_receiver_size=1,
-        async_processing=True,
-    )
-    return ctx.video_receiver