Duplicate from opencv/face_recognition_sface

Browse files

Co-authored-by: Abhishek Gola <abhishek-gola@users.noreply.huggingface.co>

Files changed (13) hide show

.gitattributes +26 -0
.gitignore +9 -0
CMakeLists.txt +11 -0
LICENSE +202 -0
README.md +68 -0
demo.cpp +322 -0
demo.py +157 -0
example_outputs/demo.jpg +3 -0
face_recognition_sface_2021dec.onnx +3 -0
face_recognition_sface_2021dec_int8.onnx +3 -0
face_recognition_sface_2021dec_int8bq.onnx +3 -0
sface.py +63 -0
yunet.py +55 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,26 @@

+# Caffe
+*.caffemodel filter=lfs diff=lfs merge=lfs -text
+# Tensorflow
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pbtxt filter=lfs diff=lfs merge=lfs -text
+# Torch
+*.t7 filter=lfs diff=lfs merge=lfs -text
+*.net filter=lfs diff=lfs merge=lfs -text
+# Darknet
+*.weights filter=lfs diff=lfs merge=lfs -text
+# ONNX
+*.onnx filter=lfs diff=lfs merge=lfs -text
+# NPY
+*.npy filter=lfs diff=lfs merge=lfs -text
+# Images
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.webp filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,9 @@

+*.pyc
+**/__pycache__
+**/__pycache__/**
+.vscode
+build/
+**/build
+**/build/**

CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+cmake_minimum_required(VERSION 3.24.0)
+project(opencv_zoo_face_recognition_sface)
+set(OPENCV_VERSION "4.9.0")
+set(OPENCV_INSTALLATION_PATH "" CACHE PATH "Where to look for OpenCV installation")
+# Find OpenCV
+find_package(OpenCV ${OPENCV_VERSION} REQUIRED HINTS ${OPENCV_INSTALLATION_PATH})
+add_executable(demo demo.cpp)
+target_link_libraries(demo ${OpenCV_LIBS})

LICENSE ADDED Viewed

	@@ -0,0 +1,202 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md ADDED Viewed

	@@ -0,0 +1,68 @@

+# SFace
+SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition
+Note:
+- SFace is contributed by [Yaoyao Zhong](https://github.com/zhongyy).
+- Model files encode MobileFaceNet instances trained on the SFace loss function, see the [SFace paper](https://arxiv.org/abs/2205.12010) for reference.
+- ONNX file conversions from [original code base](https://github.com/zhongyy/SFace) thanks to [Chengrui Wang](https://github.com/crywang).
+- (As of Sep 2021) Supporting 5-landmark warping for now, see below for details.
+- `face_recognition_sface_2021dec_int8bq.onnx` represents the block-quantized version in int8 precision and is generated using [block_quantize.py](../../tools/quantize/block_quantize.py) with `block_size=64`.
+Results of accuracy evaluation with [tools/eval](../../tools/eval).
+| Models      | Accuracy |
+| ----------- | -------- |
+| SFace       | 0.9940   |
+| SFace block | 0.9942   |
+| SFace quant | 0.9932   |
+\*: 'quant' stands for 'quantized'.
+\*\*: 'block' stands for 'blockwise quantized'.
+## Demo
+***NOTE***: This demo uses [../face_detection_yunet](../face_detection_yunet) as face detector, which supports 5-landmark detection for now (2021sep).
+Run the following command to try the demo:
+### Python
+```shell
+# recognize on images
+python demo.py --target /path/to/image1 --query /path/to/image2
+# get help regarding various parameters
+python demo.py --help
+```
+### C++
+Install latest OpenCV and CMake >= 3.24.0 to get started with:
+```shell
+# A typical and default installation path of OpenCV is /usr/local
+cmake -B build -D OPENCV_INSTALLATION_PATH=/path/to/opencv/installation .
+cmake --build build
+# detect on camera input
+./build/demo -t=/path/to/target_face
+# detect on an image
+./build/demo -t=/path/to/target_face -q=/path/to/query_face -v
+# get help messages
+./build/demo -h
+```
+### Example outputs
+![sface demo](./example_outputs/demo.jpg)
+Note: Left part of the image is the target identity, the right part is the query. Green boxes are the same identity, red boxes are different identities compared to the left.
+## License
+All files in this directory are licensed under [Apache 2.0 License](./LICENSE).
+## Reference
+- https://ieeexplore.ieee.org/document/9318547
+- https://github.com/zhongyy/SFace

demo.cpp ADDED Viewed

	@@ -0,0 +1,322 @@

+#include "opencv2/opencv.hpp"
+#include "opencv2/core/types.hpp"
+#include <string>
+#include <vector>
+const std::vector<std::pair<int, int>> backend_target_pairs = {
+    {cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_CPU},
+    {cv::dnn::DNN_BACKEND_CUDA,   cv::dnn::DNN_TARGET_CUDA},
+    {cv::dnn::DNN_BACKEND_CUDA,   cv::dnn::DNN_TARGET_CUDA_FP16},
+    {cv::dnn::DNN_BACKEND_TIMVX,  cv::dnn::DNN_TARGET_NPU},
+    {cv::dnn::DNN_BACKEND_CANN,   cv::dnn::DNN_TARGET_NPU}
+};
+class YuNet
+{
+  public:
+    YuNet(const std::string& model_path,
+          const cv::Size& input_size,
+          const float conf_threshold,
+          const float nms_threshold,
+          const int top_k,
+          const int backend_id,
+          const int target_id)
+    {
+        _detector = cv::FaceDetectorYN::create(
+            model_path, "", input_size, conf_threshold, nms_threshold, top_k, backend_id, target_id);
+    }
+    void setInputSize(const cv::Size& input_size)
+    {
+        _detector->setInputSize(input_size);
+    }
+    void setTopK(const int top_k)
+    {
+        _detector->setTopK(top_k);
+    }
+    cv::Mat infer(const cv::Mat& image)
+    {
+        cv::Mat result;
+        _detector->detect(image, result);
+        return result;
+    }
+  private:
+    cv::Ptr<cv::FaceDetectorYN> _detector;
+};
+class SFace
+{
+  public:
+    SFace(const std::string& model_path,
+          const int backend_id,
+          const int target_id,
+          const int distance_type)
+        : _distance_type(static_cast<cv::FaceRecognizerSF::DisType>(distance_type))
+    {
+        _recognizer = cv::FaceRecognizerSF::create(model_path, "", backend_id, target_id);
+    }
+    cv::Mat extractFeatures(const cv::Mat& orig_image, const cv::Mat& face_image)
+    {
+        // Align and crop detected face from original image
+        cv::Mat target_aligned;
+        _recognizer->alignCrop(orig_image, face_image, target_aligned);
+        // Extract features from cropped detected face
+        cv::Mat target_features;
+        _recognizer->feature(target_aligned, target_features);
+        return target_features.clone();
+    }
+    std::pair<double, bool> matchFeatures(const cv::Mat& target_features, const cv::Mat& query_features)
+    {
+        const double score = _recognizer->match(target_features, query_features, _distance_type);
+        if (_distance_type == cv::FaceRecognizerSF::DisType::FR_COSINE)
+        {
+            return {score, score >= _threshold_cosine};
+        }
+        return {score, score <= _threshold_norml2};
+    }
+  private:
+    cv::Ptr<cv::FaceRecognizerSF> _recognizer;
+    cv::FaceRecognizerSF::DisType _distance_type;
+    double _threshold_cosine = 0.363;
+    double _threshold_norml2 = 1.128;
+};
+cv::Mat visualize(const cv::Mat& image,
+                  const cv::Mat& faces,
+                  const std::vector<std::pair<double, bool>>& matches,
+                  const float fps = -0.1F,
+                  const cv::Size& target_size = cv::Size(512, 512))
+{
+    static const cv::Scalar matched_box_color{0, 255, 0};
+    static const cv::Scalar mismatched_box_color{0, 0, 255};
+    if (fps >= 0)
+    {
+        cv::Mat output_image = image.clone();
+        const int x1 = static_cast<int>(faces.at<float>(0, 0));
+        const int y1 = static_cast<int>(faces.at<float>(0, 1));
+        const int w = static_cast<int>(faces.at<float>(0, 2));
+        const int h = static_cast<int>(faces.at<float>(0, 3));
+        const auto match = matches.at(0);
+        cv::Scalar box_color = match.second ? matched_box_color : mismatched_box_color;
+        // Draw bounding box
+        cv::rectangle(output_image, cv::Rect(x1, y1, w, h), box_color, 2);
+        // Draw match score
+        cv::putText(output_image, cv::format("%.4f", match.first), cv::Point(x1, y1+12), cv::FONT_HERSHEY_DUPLEX, 0.30, box_color);
+        // Draw FPS
+        cv::putText(output_image, cv::format("FPS: %.2f", fps), cv::Point(0, 15), cv::FONT_HERSHEY_SIMPLEX, 0.5, box_color, 2);
+        return output_image;
+    }
+    cv::Mat output_image = cv::Mat::zeros(target_size, CV_8UC3);
+    // Determine new height and width of image with aspect ratio of original image
+    const double ratio = std::min(static_cast<double>(target_size.height) / image.rows,
+                                  static_cast<double>(target_size.width) / image.cols);
+    const int new_height = static_cast<int>(image.rows * ratio);
+    const int new_width = static_cast<int>(image.cols * ratio);
+    // Resize the original image, maintaining aspect ratio
+    cv::Mat resize_out;
+    cv::resize(image, resize_out, cv::Size(new_width, new_height), cv::INTER_LINEAR);
+    // Determine top left corner in resized dimensions
+    const int top = std::max(0, target_size.height - new_height) / 2;
+    const int left = std::max(0, target_size.width - new_width) / 2;
+    // Copy resized image into target output image
+    const cv::Rect roi = cv::Rect(cv::Point(left, top), cv::Size(new_width, new_height));
+    cv::Mat out_sub_image = output_image(roi);
+    resize_out.copyTo(out_sub_image);
+    for (int i = 0; i < faces.rows; ++i)
+    {
+        const int x1 = static_cast<int>(faces.at<float>(i, 0) * ratio) + left;
+        const int y1 = static_cast<int>(faces.at<float>(i, 1) * ratio) + top;
+        const int w = static_cast<int>(faces.at<float>(i, 2) * ratio);
+        const int h = static_cast<int>(faces.at<float>(i, 3) * ratio);
+        const auto match = matches.at(i);
+        cv::Scalar box_color = match.second ? matched_box_color : mismatched_box_color;
+        // Draw bounding box
+        cv::rectangle(output_image, cv::Rect(x1, y1, w, h), box_color, 2);
+        // Draw match score
+        cv::putText(output_image, cv::format("%.4f", match.first), cv::Point(x1, y1+12), cv::FONT_HERSHEY_DUPLEX, 0.30, box_color);
+    }
+    return output_image;
+}
+int main(int argc, char** argv)
+{
+    cv::CommandLineParser parser(argc, argv,
+        // General options
+        "{help  h           |                                     | Print this message}"
+        "{backend_target b  | 0                                   | Set DNN backend target pair:\n"
+                                                                   "0: (default) OpenCV implementation + CPU,\n"
+                                                                   "1: CUDA + GPU (CUDA),\n"
+                                                                   "2: CUDA + GPU (CUDA FP16),\n"
+                                                                   "3: TIM-VX + NPU,\n"
+                                                                   "4: CANN + NPU}"
+        "{save s            | false                               | Whether to save result image or not}"
+        "{vis v             | false                               | Whether to visualize result image or not}"
+        // SFace options
+        "{target_face t     |                                     | Set path to input image 1 (target face)}"
+        "{query_face q      |                                     | Set path to input image 2 (query face), omit if using camera}"
+        "{model m           | face_recognition_sface_2021dec.onnx | Set path to the model}"
+        "{distance_type d   | 0                                   | 0 = cosine, 1 = norm_l1}"
+        // YuNet options
+        "{yunet_model       | ../face_detection_yunet/face_detection_yunet_2023mar.onnx | Set path to the YuNet model}"
+        "{detect_threshold  | 0.9                                                       | Set the minimum confidence for the model\n"
+                                                                                         "to identify a face. Filter out faces of\n"
+                                                                                         "conf < conf_threshold}"
+        "{nms_threshold     | 0.3                                                       | Set the threshold to suppress overlapped boxes.\n"
+                                                                                         "Suppress boxes if IoU(box1, box2) >= nms_threshold\n"
+                                                                                         ", the one of higher score is kept.}"
+        "{top_k             | 5000                                                      | Keep top_k bounding boxes before NMS}"
+    );
+    if (parser.has("help"))
+    {
+        parser.printMessage();
+        return 0;
+    }
+    // General CLI options
+    const int backend = parser.get<int>("backend_target");
+    const bool save_flag = parser.get<bool>("save");
+    const bool vis_flag = parser.get<bool>("vis");
+    const int backend_id = backend_target_pairs.at(backend).first;
+    const int target_id = backend_target_pairs.at(backend).second;
+    // YuNet CLI options
+    const std::string detector_model_path = parser.get<std::string>("yunet_model");
+    const float detect_threshold = parser.get<float>("detect_threshold");
+    const float nms_threshold = parser.get<float>("nms_threshold");
+    const int top_k = parser.get<int>("top_k");
+    // Use YuNet as the detector backend
+    auto face_detector = YuNet(
+        detector_model_path, cv::Size(320, 320), detect_threshold, nms_threshold, top_k, backend_id, target_id);
+    // SFace CLI options
+    const std::string target_path = parser.get<std::string>("target_face");
+    const std::string query_path = parser.get<std::string>("query_face");
+    const std::string model_path = parser.get<std::string>("model");
+    const int distance_type = parser.get<int>("distance_type");
+    auto face_recognizer = SFace(model_path, backend_id, target_id, distance_type);
+    if (target_path.empty())
+    {
+        CV_Error(cv::Error::StsError, "Path to target image " + target_path + " not found");
+    }
+    cv::Mat target_image = cv::imread(target_path);
+    // Detect single face in target image
+    face_detector.setInputSize(target_image.size());
+    face_detector.setTopK(1);
+    cv::Mat target_face = face_detector.infer(target_image);
+    // Extract features from target face
+    cv::Mat target_features = face_recognizer.extractFeatures(target_image, target_face.row(0));
+    if (!query_path.empty()) // use image
+    {
+        // Detect any faces in query image
+        cv::Mat query_image = cv::imread(query_path);
+        face_detector.setInputSize(query_image.size());
+        face_detector.setTopK(5000);
+        cv::Mat query_faces = face_detector.infer(query_image);
+        // Store match scores for visualization
+        std::vector<std::pair<double, bool>> matches;
+        for (int i = 0; i < query_faces.rows; ++i)
+        {
+            // Extract features from query face
+            cv::Mat query_features = face_recognizer.extractFeatures(query_image, query_faces.row(i));
+            // Measure similarity of target face to query face
+            const auto match = face_recognizer.matchFeatures(target_features, query_features);
+            matches.push_back(match);
+            const int x1 = static_cast<int>(query_faces.at<float>(i, 0));
+            const int y1 = static_cast<int>(query_faces.at<float>(i, 1));
+            const int w = static_cast<int>(query_faces.at<float>(i, 2));
+            const int h = static_cast<int>(query_faces.at<float>(i, 3));
+            const float conf = query_faces.at<float>(i, 14);
+            std::cout << cv::format("%d: x1=%d, y1=%d, w=%d, h=%d, conf=%.4f, match=%.4f\n", i, x1, y1, w, h, conf, match.first);
+        }
+        if (save_flag || vis_flag)
+        {
+            auto vis_target = visualize(target_image, target_face, {{1.0, true}});
+            auto vis_query = visualize(query_image, query_faces, matches);
+            cv::Mat output_image;
+            cv::hconcat(vis_target, vis_query, output_image);
+            if (save_flag)
+            {
+                std::cout << "Results are saved to result.jpg\n";
+                cv::imwrite("result.jpg", output_image);
+            }
+            if (vis_flag)
+            {
+                cv::namedWindow(query_path, cv::WINDOW_AUTOSIZE);
+                cv::imshow(query_path, output_image);
+                cv::waitKey(0);
+            }
+        }
+    }
+    else // use video capture
+    {
+        const int device_id = 0;
+        auto cap = cv::VideoCapture(device_id);
+        const int w = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_WIDTH));
+        const int h = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_HEIGHT));
+        face_detector.setInputSize(cv::Size(w, h));
+        auto tick_meter = cv::TickMeter();
+        cv::Mat query_frame;
+        while (cv::waitKey(1) < 0)
+        {
+            bool has_frame = cap.read(query_frame);
+            if (!has_frame)
+            {
+                std::cout << "No frames grabbed! Exiting ...\n";
+                break;
+            }
+            tick_meter.start();
+            // Detect faces from webcam image
+            cv::Mat query_faces = face_detector.infer(query_frame);
+            tick_meter.stop();
+            // Extract features from query face
+            cv::Mat query_features = face_recognizer.extractFeatures(query_frame, query_faces.row(0));
+            // Measure similarity of target face to query face
+            const auto match = face_recognizer.matchFeatures(target_features, query_features);
+            const auto fps = static_cast<float>(tick_meter.getFPS());
+            auto vis_target = visualize(target_image, target_face, {{1.0, true}}, -0.1F, cv::Size(w, h));
+            auto vis_query = visualize(query_frame, query_faces, {match}, fps);
+            cv::Mat output_image;
+            cv::hconcat(vis_target, vis_query, output_image);
+            // Visualize in a new window
+            cv::imshow("SFace Demo", output_image);
+            tick_meter.reset();
+        }
+    }
+    return 0;
+}

demo.py ADDED Viewed

	@@ -0,0 +1,157 @@

+# This file is part of OpenCV Zoo project.
+# It is subject to the license terms in the LICENSE file found in the same directory.
+#
+# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
+# Third party copyrights are property of their respective owners.
+import sys
+import argparse
+import numpy as np
+import cv2 as cv
+from huggingface_hub import hf_hub_download
+# Check OpenCV version
+opencv_python_version = lambda str_version: tuple(map(int, (str_version.split("."))))
+assert opencv_python_version(cv.__version__) >= opencv_python_version("4.10.0"), \
+       "Please install latest opencv-python for benchmark: python3 -m pip install --upgrade opencv-python"
+from sface import SFace
+from yunet import YuNet
+yunet_model_path = hf_hub_download(repo_id="opencv/face_detection_yunet", filename="face_detection_yunet_2023mar.onnx")
+# Valid combinations of backends and targets
+backend_target_pairs = [
+    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA],
+    [cv.dnn.DNN_BACKEND_CUDA,   cv.dnn.DNN_TARGET_CUDA_FP16],
+    [cv.dnn.DNN_BACKEND_TIMVX,  cv.dnn.DNN_TARGET_NPU],
+    [cv.dnn.DNN_BACKEND_CANN,   cv.dnn.DNN_TARGET_NPU]
+]
+parser = argparse.ArgumentParser(
+    description="SFace: Sigmoid-Constrained Hypersphere Loss for Robust Face Recognition (https://ieeexplore.ieee.org/document/9318547)")
+parser.add_argument('--target', '-t', type=str,
+                    help='Usage: Set path to the input image 1 (target face).')
+parser.add_argument('--query', '-q', type=str,
+                    help='Usage: Set path to the input image 2 (query).')
+parser.add_argument('--model', '-m', type=str, default='face_recognition_sface_2021dec.onnx',
+                    help='Usage: Set model path, defaults to face_recognition_sface_2021dec.onnx.')
+parser.add_argument('--backend_target', '-bt', type=int, default=0,
+                    help='''Choose one of the backend-target pair to run this demo:
+                        {:d}: (default) OpenCV implementation + CPU,
+                        {:d}: CUDA + GPU (CUDA),
+                        {:d}: CUDA + GPU (CUDA FP16),
+                        {:d}: TIM-VX + NPU,
+                        {:d}: CANN + NPU
+                    '''.format(*[x for x in range(len(backend_target_pairs))]))
+parser.add_argument('--dis_type', type=int, choices=[0, 1], default=0,
+                    help='Usage: Distance type. \'0\': cosine, \'1\': norm_l1. Defaults to \'0\'')
+parser.add_argument('--save', '-s', action='store_true',
+                    help='Usage: Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.')
+parser.add_argument('--vis', '-v', action='store_true',
+                    help='Usage: Specify to open a new window to show results. Invalid in case of camera input.')
+args = parser.parse_args()
+def visualize(img1, faces1, img2, faces2, matches, scores, target_size=[512, 512]): # target_size: (h, w)
+    out1 = img1.copy()
+    out2 = img2.copy()
+    matched_box_color = (0, 255, 0)    # BGR
+    mismatched_box_color = (0, 0, 255) # BGR
+    # Resize to 256x256 with the same aspect ratio
+    padded_out1 = np.zeros((target_size[0], target_size[1], 3)).astype(np.uint8)
+    h1, w1, _ = out1.shape
+    ratio1 = min(target_size[0] / out1.shape[0], target_size[1] / out1.shape[1])
+    new_h1 = int(h1 * ratio1)
+    new_w1 = int(w1 * ratio1)
+    resized_out1 = cv.resize(out1, (new_w1, new_h1), interpolation=cv.INTER_LINEAR).astype(np.float32)
+    top = max(0, target_size[0] - new_h1) // 2
+    bottom = top + new_h1
+    left = max(0, target_size[1] - new_w1) // 2
+    right = left + new_w1
+    padded_out1[top : bottom, left : right] = resized_out1
+    # Draw bbox
+    bbox1 = faces1[0][:4] * ratio1
+    x, y, w, h = bbox1.astype(np.int32)
+    cv.rectangle(padded_out1, (x + left, y + top), (x + left + w, y + top + h), matched_box_color, 2)
+    # Resize to 256x256 with the same aspect ratio
+    padded_out2 = np.zeros((target_size[0], target_size[1], 3)).astype(np.uint8)
+    h2, w2, _ = out2.shape
+    ratio2 = min(target_size[0] / out2.shape[0], target_size[1] / out2.shape[1])
+    new_h2 = int(h2 * ratio2)
+    new_w2 = int(w2 * ratio2)
+    resized_out2 = cv.resize(out2, (new_w2, new_h2), interpolation=cv.INTER_LINEAR).astype(np.float32)
+    top = max(0, target_size[0] - new_h2) // 2
+    bottom = top + new_h2
+    left = max(0, target_size[1] - new_w2) // 2
+    right = left + new_w2
+    padded_out2[top : bottom, left : right] = resized_out2
+    # Draw bbox
+    assert faces2.shape[0] == len(matches), "number of faces2 needs to match matches"
+    assert len(matches) == len(scores), "number of matches needs to match number of scores"
+    for index, match in enumerate(matches):
+        bbox2 = faces2[index][:4] * ratio2
+        x, y, w, h = bbox2.astype(np.int32)
+        box_color = matched_box_color if match else mismatched_box_color
+        cv.rectangle(padded_out2, (x + left, y + top), (x + left + w, y + top + h), box_color, 2)
+        score = scores[index]
+        text_color = matched_box_color if match else mismatched_box_color
+        cv.putText(padded_out2, "{:.2f}".format(score), (x + left, y + top - 5), cv.FONT_HERSHEY_DUPLEX, 0.4, text_color)
+    return np.concatenate([padded_out1, padded_out2], axis=1)
+if __name__ == '__main__':
+    backend_id = backend_target_pairs[args.backend_target][0]
+    target_id = backend_target_pairs[args.backend_target][1]
+    # Instantiate SFace for face recognition
+    recognizer = SFace(modelPath=args.model,
+                       disType=args.dis_type,
+                       backendId=backend_id,
+                       targetId=target_id)
+    # Instantiate YuNet for face detection
+    detector = YuNet(modelPath=yunet_model_path,
+                     inputSize=[320, 320],
+                     confThreshold=0.9,
+                     nmsThreshold=0.3,
+                     topK=5000,
+                     backendId=backend_id,
+                     targetId=target_id)
+    img1 = cv.imread(args.target)
+    img2 = cv.imread(args.query)
+    # Detect faces
+    detector.setInputSize([img1.shape[1], img1.shape[0]])
+    faces1 = detector.infer(img1)
+    assert faces1.shape[0] > 0, 'Cannot find a face in {}'.format(args.target)
+    detector.setInputSize([img2.shape[1], img2.shape[0]])
+    faces2 = detector.infer(img2)
+    assert faces2.shape[0] > 0, 'Cannot find a face in {}'.format(args.query)
+    # Match
+    scores = []
+    matches = []
+    for face in faces2:
+        result = recognizer.match(img1, faces1[0][:-1], img2, face[:-1])
+        scores.append(result[0])
+        matches.append(result[1])
+    # Draw results
+    image = visualize(img1, faces1, img2, faces2, matches, scores)
+    # Save results if save is true
+    if args.save:
+        print('Resutls saved to result.jpg\n')
+        cv.imwrite('result.jpg', image)
+    # Visualize results in a new window
+    if args.vis:
+        cv.namedWindow("SFace Demo", cv.WINDOW_AUTOSIZE)
+        cv.imshow("SFace Demo", image)
+        cv.waitKey(0)

example_outputs/demo.jpg ADDED Viewed

Git LFS Details

SHA256: 0f879881a598fea6fec74e047e6a1d00e36d81de63bf0ed392b628e6ab6c2fc4
Pointer size: 131 Bytes
Size of remote file: 156 kB

face_recognition_sface_2021dec.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ba9fbfa01b5270c96627c4ef784da859931e02f04419c829e83484087c34e79
+size 38696353

face_recognition_sface_2021dec_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b0e941e6f16cc048c20aee0c8e31f569118f65d702914540f7bfdc14048d78a
+size 9896933

face_recognition_sface_2021dec_int8bq.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb143eea07838aa532d1c95df5f69899974ea0140e1fba05e94204be13ed74ee
+size 10667852

sface.py ADDED Viewed

	@@ -0,0 +1,63 @@

+# This file is part of OpenCV Zoo project.
+# It is subject to the license terms in the LICENSE file found in the same directory.
+#
+# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
+# Third party copyrights are property of their respective owners.
+import numpy as np
+import cv2 as cv
+class SFace:
+    def __init__(self, modelPath, disType=0, backendId=0, targetId=0):
+        self._modelPath = modelPath
+        self._backendId = backendId
+        self._targetId = targetId
+        self._model = cv.FaceRecognizerSF.create(
+            model=self._modelPath,
+            config="",
+            backend_id=self._backendId,
+            target_id=self._targetId)
+        self._disType = disType # 0: cosine similarity, 1: Norm-L2 distance
+        assert self._disType in [0, 1], "0: Cosine similarity, 1: norm-L2 distance, others: invalid"
+        self._threshold_cosine = 0.363
+        self._threshold_norml2 = 1.128
+    @property
+    def name(self):
+        return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backendId = backendId
+        self._targetId = targetId
+        self._model = cv.FaceRecognizerSF.create(
+            model=self._modelPath,
+            config="",
+            backend_id=self._backendId,
+            target_id=self._targetId)
+    def _preprocess(self, image, bbox):
+        if bbox is None:
+            return image
+        else:
+            return self._model.alignCrop(image, bbox)
+    def infer(self, image, bbox=None):
+        # Preprocess
+        inputBlob = self._preprocess(image, bbox)
+        # Forward
+        features = self._model.feature(inputBlob)
+        return features
+    def match(self, image1, face1, image2, face2):
+        feature1 = self.infer(image1, face1)
+        feature2 = self.infer(image2, face2)
+        if self._disType == 0: # COSINE
+            cosine_score = self._model.match(feature1, feature2, self._disType)
+            return cosine_score, 1 if cosine_score >= self._threshold_cosine else 0
+        else: # NORM_L2
+            norml2_distance = self._model.match(feature1, feature2, self._disType)
+            return norml2_distance, 1 if norml2_distance <= self._threshold_norml2 else 0

yunet.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# This file is part of OpenCV Zoo project.
+# It is subject to the license terms in the LICENSE file found in the same directory.
+#
+# Copyright (C) 2021, Shenzhen Institute of Artificial Intelligence and Robotics for Society, all rights reserved.
+# Third party copyrights are property of their respective owners.
+from itertools import product
+import numpy as np
+import cv2 as cv
+class YuNet:
+    def __init__(self, modelPath, inputSize=[320, 320], confThreshold=0.6, nmsThreshold=0.3, topK=5000, backendId=0, targetId=0):
+        self._modelPath = modelPath
+        self._inputSize = tuple(inputSize) # [w, h]
+        self._confThreshold = confThreshold
+        self._nmsThreshold = nmsThreshold
+        self._topK = topK
+        self._backendId = backendId
+        self._targetId = targetId
+        self._model = cv.FaceDetectorYN.create(
+            model=self._modelPath,
+            config="",
+            input_size=self._inputSize,
+            score_threshold=self._confThreshold,
+            nms_threshold=self._nmsThreshold,
+            top_k=self._topK,
+            backend_id=self._backendId,
+            target_id=self._targetId)
+    @property
+    def name(self):
+        return self.__class__.__name__
+    def setBackendAndTarget(self, backendId, targetId):
+        self._backendId = backendId
+        self._targetId = targetId
+        self._model = cv.FaceDetectorYN.create(
+            model=self._modelPath,
+            config="",
+            input_size=self._inputSize,
+            score_threshold=self._confThreshold,
+            nms_threshold=self._nmsThreshold,
+            top_k=self._topK,
+            backend_id=self._backendId,
+            target_id=self._targetId)
+    def setInputSize(self, input_size):
+        self._model.setInputSize(tuple(input_size))
+    def infer(self, image):
+        # Forward
+        faces = self._model.detect(image)
+        return np.empty(shape=(0, 5)) if faces[1] is None else faces[1]