Spaces:

onitsche
/

recognizer

Running

Oliver Nitsche Claude Sonnet 4.6 commited on 28 days ago

Commit

6d06d8a

1 Parent(s): ee7b214

Fix face recognition: BGR→RGB, robust zip extraction, schema versioning

Two bugs made recognition impossible:

1. BGR→RGB: MobileFaceNet is trained on RGB images. _embed() was feeding
raw OpenCV BGR arrays, producing systematically wrong embeddings.
Fixed by adding cv2.cvtColor(BGR2RGB) before normalisation.

2. Zip entry path: w600k_mbf.onnx lives inside a buffalo_sc/ subdirectory
in the release zip, not at the root. The hardcoded entry name caused
extraction to fail (KeyError), so the model was never written to disk.
Fixed by searching the zip namelist for any entry ending in the filename.

Additional improvements:
- SCHEMA_VERSION (=3) in face_db.py: load() auto-clears the DB when the
embedding pipeline changes, so stale BGR embeddings from older code are
discarded automatically on the first run after upgrade.
- Threshold raised back to 0.35 (correct RGB embeddings are more consistent).
- Detection more lenient: minNeighbors 4→3, minSize 60→40.
- INFO/DEBUG logging of similarity scores for diagnosability.
- POST /clear_db endpoint + "Clear face database" button in the UI so the
user can force re-enrollment without SSH access.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (5) hide show

recognizer/face_db.py +60 -26
recognizer/main.py +7 -1
recognizer/static/index.html +5 -0
recognizer/static/main.js +14 -0
recognizer/static/style.css +15 -0

recognizer/face_db.py CHANGED Viewed

@@ -8,7 +8,8 @@ Alignment : eye-centre similarity transform to the InsightFace 112×112
 Matching  : cosine similarity on L2-normalised 512-D embeddings.
 Storage   : recognizer/face_db.json (gitignored).
-No compilation required — onnxruntime ships pre-built ARM64 wheels.
 """
 import json
@@ -24,6 +25,10 @@ import onnxruntime as ort
 logger = logging.getLogger(__name__)
 DB_PATH    = Path(__file__).parent / "face_db.json"
 MODEL_DIR  = Path(__file__).parent / "models"
 MODEL_FILE = MODEL_DIR / "w600k_mbf.onnx"
@@ -31,7 +36,6 @@ MODEL_URL  = (
     "https://github.com/deepinsight/insightface"
     "/releases/download/v0.7/buffalo_sc.zip"
 )
-_REC_ENTRY = "w600k_mbf.onnx"   # path inside the zip (root-level since buffalo_sc v0.7)
 _CASCADE = cv2.CascadeClassifier(
     cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
@@ -58,13 +62,20 @@ def _ensure_model() -> None:
         return
     MODEL_DIR.mkdir(exist_ok=True)
     zip_path = MODEL_DIR / "buffalo_sc.zip"
-    logger.info("Downloading face recognition model (~17 MB) — one-time setup...")
     urllib.request.urlretrieve(MODEL_URL, zip_path)
     with zipfile.ZipFile(zip_path) as zf:
-        with zf.open(_REC_ENTRY) as src, open(MODEL_FILE, "wb") as dst:
             dst.write(src.read())
     zip_path.unlink()
-    logger.info("Model ready at %s", MODEL_FILE)
 def _get_session() -> ort.InferenceSession:
@@ -80,22 +91,16 @@ def _get_session() -> ort.InferenceSession:
 def _detect(frame_bgr: np.ndarray) -> list[tuple[int, int, int, int]]:
     gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
     boxes = _CASCADE.detectMultiScale(
-        gray, scaleFactor=1.1, minNeighbors=4, minSize=(60, 60)
     )
     return [tuple(b) for b in boxes] if len(boxes) > 0 else []
 def _align(face_bgr: np.ndarray) -> np.ndarray:
-    """Return a 112×112 crop aligned on eye centres; plain resize as fallback.
-    MobileFaceNet is trained on faces warped to a canonical eye position.
-    Without this step, embeddings from different frames of the same person
-    can be too dissimilar for reliable matching.
-    """
     gray = cv2.cvtColor(face_bgr, cv2.COLOR_BGR2GRAY)
     eyes = _EYE_CASCADE.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=3)
     if len(eyes) >= 2:
-        # Pick the two largest detections and sort left-to-right
         eyes = sorted(eyes, key=lambda e: e[2] * e[3], reverse=True)[:2]
         eyes = sorted(eyes, key=lambda e: e[0])
         src = np.float32([
@@ -109,12 +114,16 @@ def _align(face_bgr: np.ndarray) -> np.ndarray:
 def _embed(face_bgr: np.ndarray) -> np.ndarray:
-    img = _align(face_bgr).astype(np.float32)
-    img = (img - 127.5) / 127.5
-    inp = np.transpose(img, (2, 0, 1))[np.newaxis]          # NCHW
     sess = _get_session()
     emb = sess.run(None, {sess.get_inputs()[0].name: inp})[0][0]
-    return emb / np.linalg.norm(emb)                        # L2-normalise
 # ---------------------------------------------------------------------------
@@ -122,25 +131,46 @@ def _embed(face_bgr: np.ndarray) -> np.ndarray:
 # ---------------------------------------------------------------------------
 def load() -> dict[str, list[list[float]]]:
-    """Load face DB from disk and warm up the ONNX session."""
-    _get_session()                  # triggers one-time model download
-    if DB_PATH.exists():
-        return json.loads(DB_PATH.read_text())
-    return {}
 def save(db: dict[str, list[list[float]]]) -> None:
-    DB_PATH.write_text(json.dumps(db, indent=2))
 def find_match(
     frame_bgr: np.ndarray,
     db: dict[str, list[list[float]]],
-    threshold: float = 0.25,
 ) -> Optional[str]:
     """Return matched name if recognised, None if face present but unknown.
-    Raises NoFaceDetected if no face appears in the image at all.
     """
     boxes = _detect(frame_bgr)
     if not boxes:
@@ -156,9 +186,13 @@ def find_match(
             if sim > best_sim:
                 best_sim, best_name = sim, name
     if best_name is not None and best_sim >= threshold:
         return best_name
-    return None   # face present but not recognised (or DB is empty)
 def add_face(

 Matching  : cosine similarity on L2-normalised 512-D embeddings.
 Storage   : recognizer/face_db.json (gitignored).
+Bump SCHEMA_VERSION whenever the embedding pipeline changes so that stale
+DB entries from older code are automatically discarded on load.
 """
 import json
 logger = logging.getLogger(__name__)
+# Bump this whenever the embedding pipeline changes (alignment, colour space,
+# model weights, normalisation, …). Mismatched DBs are auto-cleared on load.
+SCHEMA_VERSION = 3   # 1=plain-resize BGR  2=aligned BGR  3=aligned RGB
 DB_PATH    = Path(__file__).parent / "face_db.json"
 MODEL_DIR  = Path(__file__).parent / "models"
 MODEL_FILE = MODEL_DIR / "w600k_mbf.onnx"
     "https://github.com/deepinsight/insightface"
     "/releases/download/v0.7/buffalo_sc.zip"
 )
 _CASCADE = cv2.CascadeClassifier(
     cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
         return
     MODEL_DIR.mkdir(exist_ok=True)
     zip_path = MODEL_DIR / "buffalo_sc.zip"
+    logger.info("Downloading face recognition model (~17 MB) — one-time setup…")
     urllib.request.urlretrieve(MODEL_URL, zip_path)
     with zipfile.ZipFile(zip_path) as zf:
+        # The file may live at root or inside a named subdirectory (e.g. buffalo_sc/).
+        matches = [n for n in zf.namelist() if n.endswith("w600k_mbf.onnx")]
+        if not matches:
+            raise RuntimeError(
+                f"w600k_mbf.onnx not found in downloaded zip. "
+                f"Available entries: {zf.namelist()}"
+            )
+        with zf.open(matches[0]) as src, open(MODEL_FILE, "wb") as dst:
             dst.write(src.read())
     zip_path.unlink()
+    logger.info("Model saved to %s", MODEL_FILE)
 def _get_session() -> ort.InferenceSession:
 def _detect(frame_bgr: np.ndarray) -> list[tuple[int, int, int, int]]:
     gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
     boxes = _CASCADE.detectMultiScale(
+        gray, scaleFactor=1.1, minNeighbors=3, minSize=(40, 40)
     )
     return [tuple(b) for b in boxes] if len(boxes) > 0 else []
 def _align(face_bgr: np.ndarray) -> np.ndarray:
+    """Return a 112×112 crop aligned on eye centres; plain resize as fallback."""
     gray = cv2.cvtColor(face_bgr, cv2.COLOR_BGR2GRAY)
     eyes = _EYE_CASCADE.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=3)
     if len(eyes) >= 2:
         eyes = sorted(eyes, key=lambda e: e[2] * e[3], reverse=True)[:2]
         eyes = sorted(eyes, key=lambda e: e[0])
         src = np.float32([
 def _embed(face_bgr: np.ndarray) -> np.ndarray:
+    """Return an L2-normalised 512-D embedding for face_bgr."""
+    face_112 = _align(face_bgr)
+    # MobileFaceNet (InsightFace) is trained on RGB — convert from OpenCV BGR.
+    face_rgb = cv2.cvtColor(face_112, cv2.COLOR_BGR2RGB)
+    img = face_rgb.astype(np.float32)
+    img = (img - 127.5) / 127.5          # normalise to [-1, 1]
+    inp = np.transpose(img, (2, 0, 1))[np.newaxis]   # NCHW
     sess = _get_session()
     emb = sess.run(None, {sess.get_inputs()[0].name: inp})[0][0]
+    return emb / np.linalg.norm(emb)     # L2-normalise
 # ---------------------------------------------------------------------------
 # ---------------------------------------------------------------------------
 def load() -> dict[str, list[list[float]]]:
+    """Load face DB from disk and warm up the ONNX session.
+    Returns an empty dict if the DB is missing or was produced by an older
+    embedding pipeline (schema mismatch → auto-clear).
+    """
+    _get_session()
+    if not DB_PATH.exists():
+        return {}
+    raw = json.loads(DB_PATH.read_text())
+    if raw.get("_schema") != SCHEMA_VERSION:
+        logger.warning(
+            "face_db schema mismatch (file=%s expected=%s) — clearing stale embeddings",
+            raw.get("_schema"), SCHEMA_VERSION,
+        )
+        DB_PATH.unlink()
+        return {}
+    return {k: v for k, v in raw.items() if not k.startswith("_")}
 def save(db: dict[str, list[list[float]]]) -> None:
+    out: dict = {"_schema": SCHEMA_VERSION}
+    out.update(db)
+    DB_PATH.write_text(json.dumps(out, indent=2))
+def wipe() -> None:
+    """Delete all enrolled faces from disk."""
+    if DB_PATH.exists():
+        DB_PATH.unlink()
+    logger.info("Face database cleared")
 def find_match(
     frame_bgr: np.ndarray,
     db: dict[str, list[list[float]]],
+    threshold: float = 0.35,
 ) -> Optional[str]:
     """Return matched name if recognised, None if face present but unknown.
+    Raises NoFaceDetected if no face is detected in the frame at all.
     """
     boxes = _detect(frame_bgr)
     if not boxes:
             if sim > best_sim:
                 best_sim, best_name = sim, name
+    logger.debug("Best match: %s  sim=%.3f  threshold=%.2f", best_name, best_sim, threshold)
     if best_name is not None and best_sim >= threshold:
+        logger.info("Recognised: %s (sim=%.3f)", best_name, best_sim)
         return best_name
+    if best_name is not None:
+        logger.info("Face detected but not recognised (best sim=%.3f < %.2f)", best_sim, threshold)
+    return None
 def add_face(

recognizer/main.py CHANGED Viewed

@@ -16,7 +16,7 @@ import numpy as np
 from pydantic import BaseModel
 from reachy_mini import ReachyMini, ReachyMiniApp
-from recognizer.face_db import NoFaceDetected, add_face, find_match
 from recognizer.face_db import load as load_face_db
 from recognizer.tts import speak
@@ -60,6 +60,12 @@ class Recognizer(ReachyMiniApp):
                     _shared["pending_name"] = payload.name.strip()
             return {"ok": True}
         @self.settings_app.get("/status")
         def get_status():
             with _lock:

 from pydantic import BaseModel
 from reachy_mini import ReachyMini, ReachyMiniApp
+from recognizer.face_db import NoFaceDetected, add_face, find_match, wipe as wipe_face_db
 from recognizer.face_db import load as load_face_db
 from recognizer.tts import speak
                     _shared["pending_name"] = payload.name.strip()
             return {"ok": True}
+        @self.settings_app.post("/clear_db")
+        def clear_db():
+            wipe_face_db()
+            face_db.clear()
+            return {"ok": True}
         @self.settings_app.get("/status")
         def get_status():
             with _lock:

recognizer/static/index.html CHANGED Viewed

@@ -28,6 +28,11 @@
         <div id="enroll-status"></div>
     </div>
     <script src="/static/main.js"></script>
 </body>

         <div id="enroll-status"></div>
     </div>
+    <div id="admin-section">
+        <button id="clear-db-btn" class="danger">🗑 Clear face database</button>
+        <div id="clear-db-status"></div>
+    </div>
     <script src="/static/main.js"></script>
 </body>

recognizer/static/main.js CHANGED Viewed

@@ -72,6 +72,20 @@ document.getElementById("name-input").addEventListener("keydown", (e) => {
     if (e.key === "Enter") submitName();
 });
 // Poll every second
 setInterval(pollStatus, 1000);
 pollStatus();

     if (e.key === "Enter") submitName();
 });
+async function clearDb() {
+    if (!confirm("Delete all enrolled faces? The robot will not recognise anyone until they enroll again.")) return;
+    try {
+        const resp = await fetch("/clear_db", { method: "POST" });
+        const data = await resp.json();
+        document.getElementById("clear-db-status").textContent =
+            data.ok ? "✓ Database cleared – please re-enroll." : "Error clearing database.";
+    } catch (e) {
+        document.getElementById("clear-db-status").textContent = "Error clearing database.";
+    }
+}
+document.getElementById("clear-db-btn").addEventListener("click", clearDb);
 // Poll every second
 setInterval(pollStatus, 1000);
 pollStatus();

recognizer/static/style.css CHANGED Viewed

@@ -81,6 +81,21 @@ button {
 button:hover { background: #1558b0; }
 #enroll-status {
     margin-top: 0.75rem;
     font-size: 0.9rem;

 button:hover { background: #1558b0; }
+button.danger { background: #c62828; }
+button.danger:hover { background: #8e0000; }
+#admin-section {
+    margin-top: 2rem;
+    padding-top: 1rem;
+    border-top: 1px solid #ddd;
+}
+#clear-db-status {
+    margin-top: 0.6rem;
+    font-size: 0.9rem;
+    color: #c62828;
+}
 #enroll-status {
     margin-top: 0.75rem;
     font-size: 0.9rem;