Spaces:
Running
Running
Oliver Nitsche Claude Sonnet 4.6 commited on
Commit ·
2fda523
1
Parent(s): 77c6ffa
Switch to local ONNX face recognition (no compilation, no cloud)
Browse filesReplaces AWS Rekognition with onnxruntime + OpenCV Haar cascade +
InsightFace MobileFaceNet (w600k_mbf.onnx, ~17 MB downloaded once on
first run). Both onnxruntime and opencv-python-headless ship pre-built
ARM64 wheels so pip install is fast with no compilation step. Face
embeddings are stored locally in face_db.json as before.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- .gitignore +3 -1
- CLAUDE.md +7 -14
- pyproject.toml +2 -2
- recognizer/face_db.py +122 -58
- recognizer/main.py +5 -5
.gitignore
CHANGED
|
@@ -1,3 +1,5 @@
|
|
| 1 |
__pycache__/
|
| 2 |
*.egg-info/
|
| 3 |
-
build/
|
|
|
|
|
|
|
|
|
| 1 |
__pycache__/
|
| 2 |
*.egg-info/
|
| 3 |
+
build/
|
| 4 |
+
recognizer/models/
|
| 5 |
+
recognizer/face_db.json
|
CLAUDE.md
CHANGED
|
@@ -22,21 +22,14 @@ pip install -e .
|
|
| 22 |
sudo apt-get install espeak-ng # text-to-speech synthesis
|
| 23 |
```
|
| 24 |
|
| 25 |
-
###
|
| 26 |
|
| 27 |
-
Face recognition
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
-
export AWS_ACCESS_KEY_ID=...
|
| 32 |
-
export AWS_SECRET_ACCESS_KEY=...
|
| 33 |
-
export AWS_DEFAULT_REGION=us-east-1 # or your preferred region
|
| 34 |
-
```
|
| 35 |
-
|
| 36 |
-
Or use `aws configure` if the AWS CLI is installed. The app auto-creates a
|
| 37 |
-
Rekognition collection named `reachy-mini-recognizer` on first run.
|
| 38 |
-
The IAM user/role needs: `rekognition:CreateCollection`,
|
| 39 |
-
`rekognition:IndexFaces`, `rekognition:SearchFacesByImage`.
|
| 40 |
|
| 41 |
## Running the App
|
| 42 |
|
|
@@ -87,7 +80,7 @@ SLEEPING →(speech detected × 3)→ WAKING → ACTIVE → SLEEPING
|
|
| 87 |
- **ENROLLING**: robot has detected an unrecognised face; waits for name to be submitted via the web UI (`POST /set_name`). Stores encoding in `face_db.json`, says "Nice to meet you, <name>!", then sleeps.
|
| 88 |
|
| 89 |
**Helper modules**:
|
| 90 |
-
- `recognizer/face_db.py` —
|
| 91 |
- `recognizer/tts.py` — synthesises text via `espeak-ng -s 140 -w <tmp.wav>`, plays via `media.play_sound()`, then sleeps to let playback finish.
|
| 92 |
|
| 93 |
**Settings UI** (`recognizer/static/`):
|
|
|
|
| 22 |
sudo apt-get install espeak-ng # text-to-speech synthesis
|
| 23 |
```
|
| 24 |
|
| 25 |
+
### Face recognition model (one-time download)
|
| 26 |
|
| 27 |
+
Face recognition runs **locally** using ONNX Runtime (no cloud account needed).
|
| 28 |
+
On first run the app downloads the InsightFace MobileFaceNet model (~17 MB)
|
| 29 |
+
from GitHub and caches it at `recognizer/models/w600k_mbf.onnx`.
|
| 30 |
+
Requires internet access the first time only; fully offline thereafter.
|
| 31 |
|
| 32 |
+
Requires **64-bit Raspberry Pi OS** (onnxruntime ships pre-built aarch64 wheels).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
## Running the App
|
| 35 |
|
|
|
|
| 80 |
- **ENROLLING**: robot has detected an unrecognised face; waits for name to be submitted via the web UI (`POST /set_name`). Stores encoding in `face_db.json`, says "Nice to meet you, <name>!", then sleeps.
|
| 81 |
|
| 82 |
**Helper modules**:
|
| 83 |
+
- `recognizer/face_db.py` — local face recognition via ONNX Runtime. `load()` warms up the ONNX session (downloads model on first run) and returns the embedding DB dict. `find_match(frame_bgr, db)` detects with OpenCV Haar cascade, embeds with MobileFaceNet, matches by cosine similarity (threshold 0.35); raises `NoFaceDetected` if no face. `add_face(name, frame_bgr, db)` enrolls a face. DB stored in `recognizer/face_db.json`.
|
| 84 |
- `recognizer/tts.py` — synthesises text via `espeak-ng -s 140 -w <tmp.wav>`, plays via `media.play_sound()`, then sleeps to let playback finish.
|
| 85 |
|
| 86 |
**Settings UI** (`recognizer/static/`):
|
pyproject.toml
CHANGED
|
@@ -11,8 +11,8 @@ readme = "README.md"
|
|
| 11 |
requires-python = ">=3.10"
|
| 12 |
dependencies = [
|
| 13 |
"reachy-mini",
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
"scipy",
|
| 17 |
]
|
| 18 |
keywords = ["reachy-mini-app", "reachy-mini"]
|
|
|
|
| 11 |
requires-python = ">=3.10"
|
| 12 |
dependencies = [
|
| 13 |
"reachy-mini",
|
| 14 |
+
"onnxruntime",
|
| 15 |
+
"opencv-python-headless",
|
| 16 |
"scipy",
|
| 17 |
]
|
| 18 |
keywords = ["reachy-mini-app", "reachy-mini"]
|
recognizer/face_db.py
CHANGED
|
@@ -1,86 +1,150 @@
|
|
| 1 |
-
"""Face database:
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
| 7 |
-
import
|
| 8 |
import logging
|
|
|
|
|
|
|
|
|
|
| 9 |
from typing import Optional
|
| 10 |
|
| 11 |
-
import
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
|
| 20 |
class NoFaceDetected(Exception):
|
| 21 |
"""Raised when no face is found in the provided image."""
|
| 22 |
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
Image.fromarray(rgb).save(buf, format="JPEG")
|
| 32 |
-
return buf.getvalue()
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
try:
|
| 39 |
-
client.create_collection(CollectionId=COLLECTION_ID)
|
| 40 |
-
logger.info("Created Rekognition collection '%s'", COLLECTION_ID)
|
| 41 |
-
except client.exceptions.ResourceAlreadyExistsException:
|
| 42 |
-
pass
|
| 43 |
-
return COLLECTION_ID
|
| 44 |
|
| 45 |
|
| 46 |
def find_match(
|
| 47 |
-
frame_bgr,
|
| 48 |
-
|
| 49 |
-
threshold: float =
|
| 50 |
) -> Optional[str]:
|
| 51 |
-
"""
|
| 52 |
|
| 53 |
-
|
| 54 |
-
unknown. Raises NoFaceDetected if no face appears in the image at all.
|
| 55 |
"""
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
resp = client.search_faces_by_image(
|
| 59 |
-
CollectionId=collection_id,
|
| 60 |
-
Image={"Bytes": _to_jpeg(frame_bgr)},
|
| 61 |
-
FaceMatchThreshold=threshold,
|
| 62 |
-
MaxFaces=1,
|
| 63 |
-
)
|
| 64 |
-
matches = resp.get("FaceMatches", [])
|
| 65 |
-
if matches:
|
| 66 |
-
return matches[0]["Face"]["ExternalImageId"]
|
| 67 |
-
return None # face detected but not in collection
|
| 68 |
-
except client.exceptions.InvalidParameterException:
|
| 69 |
raise NoFaceDetected()
|
| 70 |
-
except (BotoCoreError, ClientError) as exc:
|
| 71 |
-
logger.warning("Rekognition error: %s", exc)
|
| 72 |
-
raise NoFaceDetected()
|
| 73 |
-
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
if not
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
raise ValueError("No face detected in enrollment image")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Face database: local face recognition via ONNX + OpenCV.
|
| 2 |
|
| 3 |
+
Detection : OpenCV Haar cascade (built into opencv, no download).
|
| 4 |
+
Embedding : InsightFace MobileFaceNet (w600k_mbf.onnx, ~17 MB, downloaded
|
| 5 |
+
once on first run from the InsightFace GitHub release).
|
| 6 |
+
Matching : cosine similarity on L2-normalised 512-D embeddings.
|
| 7 |
+
Storage : recognizer/face_db.json (gitignored).
|
| 8 |
+
|
| 9 |
+
No compilation required — onnxruntime ships pre-built ARM64 wheels.
|
| 10 |
"""
|
| 11 |
|
| 12 |
+
import json
|
| 13 |
import logging
|
| 14 |
+
import urllib.request
|
| 15 |
+
import zipfile
|
| 16 |
+
from pathlib import Path
|
| 17 |
from typing import Optional
|
| 18 |
|
| 19 |
+
import cv2
|
| 20 |
+
import numpy as np
|
| 21 |
+
import onnxruntime as ort
|
| 22 |
|
| 23 |
logger = logging.getLogger(__name__)
|
| 24 |
|
| 25 |
+
DB_PATH = Path(__file__).parent / "face_db.json"
|
| 26 |
+
MODEL_DIR = Path(__file__).parent / "models"
|
| 27 |
+
MODEL_FILE = MODEL_DIR / "w600k_mbf.onnx"
|
| 28 |
+
MODEL_URL = (
|
| 29 |
+
"https://github.com/deepinsight/insightface"
|
| 30 |
+
"/releases/download/v0.7/buffalo_sc.zip"
|
| 31 |
+
)
|
| 32 |
+
_REC_ENTRY = "buffalo_sc/w600k_mbf.onnx" # path inside the zip
|
| 33 |
+
|
| 34 |
+
_CASCADE = cv2.CascadeClassifier(
|
| 35 |
+
cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
|
| 36 |
+
)
|
| 37 |
+
_session: Optional[ort.InferenceSession] = None
|
| 38 |
|
| 39 |
|
| 40 |
class NoFaceDetected(Exception):
|
| 41 |
"""Raised when no face is found in the provided image."""
|
| 42 |
|
| 43 |
|
| 44 |
+
# ---------------------------------------------------------------------------
|
| 45 |
+
# Internal helpers
|
| 46 |
+
# ---------------------------------------------------------------------------
|
| 47 |
+
|
| 48 |
+
def _ensure_model() -> None:
|
| 49 |
+
if MODEL_FILE.exists():
|
| 50 |
+
return
|
| 51 |
+
MODEL_DIR.mkdir(exist_ok=True)
|
| 52 |
+
zip_path = MODEL_DIR / "buffalo_sc.zip"
|
| 53 |
+
logger.info("Downloading face recognition model (~17 MB) — one-time setup...")
|
| 54 |
+
urllib.request.urlretrieve(MODEL_URL, zip_path)
|
| 55 |
+
with zipfile.ZipFile(zip_path) as zf:
|
| 56 |
+
with zf.open(_REC_ENTRY) as src, open(MODEL_FILE, "wb") as dst:
|
| 57 |
+
dst.write(src.read())
|
| 58 |
+
zip_path.unlink()
|
| 59 |
+
logger.info("Model ready at %s", MODEL_FILE)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _get_session() -> ort.InferenceSession:
|
| 63 |
+
global _session
|
| 64 |
+
if _session is None:
|
| 65 |
+
_ensure_model()
|
| 66 |
+
_session = ort.InferenceSession(
|
| 67 |
+
str(MODEL_FILE), providers=["CPUExecutionProvider"]
|
| 68 |
+
)
|
| 69 |
+
return _session
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def _detect(frame_bgr: np.ndarray) -> list[tuple[int, int, int, int]]:
|
| 73 |
+
gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
|
| 74 |
+
boxes = _CASCADE.detectMultiScale(
|
| 75 |
+
gray, scaleFactor=1.1, minNeighbors=4, minSize=(60, 60)
|
| 76 |
+
)
|
| 77 |
+
return [tuple(b) for b in boxes] if len(boxes) > 0 else []
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def _embed(face_bgr: np.ndarray) -> np.ndarray:
|
| 81 |
+
img = cv2.resize(face_bgr, (112, 112)).astype(np.float32)
|
| 82 |
+
img = (img - 127.5) / 127.5
|
| 83 |
+
inp = np.transpose(img, (2, 0, 1))[np.newaxis] # NCHW
|
| 84 |
+
sess = _get_session()
|
| 85 |
+
emb = sess.run(None, {sess.get_inputs()[0].name: inp})[0][0]
|
| 86 |
+
return emb / np.linalg.norm(emb) # L2-normalise
|
| 87 |
|
| 88 |
|
| 89 |
+
# ---------------------------------------------------------------------------
|
| 90 |
+
# Public API (called from main.py)
|
| 91 |
+
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
| 92 |
|
| 93 |
+
def load() -> dict[str, list[list[float]]]:
|
| 94 |
+
"""Load face DB from disk and warm up the ONNX session."""
|
| 95 |
+
_get_session() # triggers one-time model download
|
| 96 |
+
if DB_PATH.exists():
|
| 97 |
+
return json.loads(DB_PATH.read_text())
|
| 98 |
+
return {}
|
| 99 |
|
| 100 |
+
|
| 101 |
+
def save(db: dict[str, list[list[float]]]) -> None:
|
| 102 |
+
DB_PATH.write_text(json.dumps(db, indent=2))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
|
| 105 |
def find_match(
|
| 106 |
+
frame_bgr: np.ndarray,
|
| 107 |
+
db: dict[str, list[list[float]]],
|
| 108 |
+
threshold: float = 0.35,
|
| 109 |
) -> Optional[str]:
|
| 110 |
+
"""Return matched name if recognised, None if face present but unknown.
|
| 111 |
|
| 112 |
+
Raises NoFaceDetected if no face appears in the image at all.
|
|
|
|
| 113 |
"""
|
| 114 |
+
boxes = _detect(frame_bgr)
|
| 115 |
+
if not boxes:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
raise NoFaceDetected()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
+
x, y, w, h = boxes[0]
|
| 119 |
+
emb = _embed(frame_bgr[y : y + h, x : x + w])
|
| 120 |
+
|
| 121 |
+
best_name, best_sim = None, -1.0
|
| 122 |
+
for name, enc_list in db.items():
|
| 123 |
+
for enc in enc_list:
|
| 124 |
+
sim = float(np.dot(emb, np.array(enc)))
|
| 125 |
+
if sim > best_sim:
|
| 126 |
+
best_sim, best_name = sim, name
|
| 127 |
+
|
| 128 |
+
if best_name is not None and best_sim >= threshold:
|
| 129 |
+
return best_name
|
| 130 |
+
return None # face present but not recognised (or DB is empty)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def add_face(
|
| 134 |
+
name: str,
|
| 135 |
+
frame_bgr: np.ndarray,
|
| 136 |
+
db: dict[str, list[list[float]]],
|
| 137 |
+
max_per_person: int = 5,
|
| 138 |
+
) -> None:
|
| 139 |
+
"""Embed and store the face from frame_bgr under name."""
|
| 140 |
+
boxes = _detect(frame_bgr)
|
| 141 |
+
if not boxes:
|
| 142 |
raise ValueError("No face detected in enrollment image")
|
| 143 |
+
|
| 144 |
+
x, y, w, h = boxes[0]
|
| 145 |
+
emb = _embed(frame_bgr[y : y + h, x : x + w])
|
| 146 |
+
|
| 147 |
+
db.setdefault(name, [])
|
| 148 |
+
if len(db[name]) < max_per_person:
|
| 149 |
+
db[name].append(emb.tolist())
|
| 150 |
+
save(db)
|
recognizer/main.py
CHANGED
|
@@ -66,7 +66,7 @@ class Recognizer(ReachyMiniApp):
|
|
| 66 |
return {"state": _shared["state"]}
|
| 67 |
|
| 68 |
# --- Initialise ---
|
| 69 |
-
|
| 70 |
state = State.SLEEPING
|
| 71 |
doa_angle = math.pi / 2 # default: facing front
|
| 72 |
speech_count = 0
|
|
@@ -112,7 +112,7 @@ class Recognizer(ReachyMiniApp):
|
|
| 112 |
active_start = time.time()
|
| 113 |
scan_t0 = active_start
|
| 114 |
last_face_check = 0.0
|
| 115 |
-
|
| 116 |
state = State.ACTIVE
|
| 117 |
|
| 118 |
# ---------- ACTIVE ----------
|
|
@@ -129,13 +129,13 @@ class Recognizer(ReachyMiniApp):
|
|
| 129 |
head=_look_direction(1.0, y_scan, 0.0)
|
| 130 |
)
|
| 131 |
|
| 132 |
-
# Throttled face recognition
|
| 133 |
if now - last_face_check >= FACE_INTERVAL:
|
| 134 |
last_face_check = now
|
| 135 |
frame = reachy_mini.media.get_frame()
|
| 136 |
if frame is not None:
|
| 137 |
try:
|
| 138 |
-
name = find_match(frame,
|
| 139 |
if name:
|
| 140 |
speak(f"Hi {name}!", reachy_mini)
|
| 141 |
reachy_mini.goto_sleep()
|
|
@@ -171,7 +171,7 @@ class Recognizer(ReachyMiniApp):
|
|
| 171 |
_shared["pending_name"] = None
|
| 172 |
if pending_frame is not None:
|
| 173 |
try:
|
| 174 |
-
add_face(name, pending_frame,
|
| 175 |
except ValueError as exc:
|
| 176 |
logger.warning("Enrollment failed: %s", exc)
|
| 177 |
speak(f"Nice to meet you, {name}!", reachy_mini)
|
|
|
|
| 66 |
return {"state": _shared["state"]}
|
| 67 |
|
| 68 |
# --- Initialise ---
|
| 69 |
+
face_db = load_face_db()
|
| 70 |
state = State.SLEEPING
|
| 71 |
doa_angle = math.pi / 2 # default: facing front
|
| 72 |
speech_count = 0
|
|
|
|
| 112 |
active_start = time.time()
|
| 113 |
scan_t0 = active_start
|
| 114 |
last_face_check = 0.0
|
| 115 |
+
pending_frame = None
|
| 116 |
state = State.ACTIVE
|
| 117 |
|
| 118 |
# ---------- ACTIVE ----------
|
|
|
|
| 129 |
head=_look_direction(1.0, y_scan, 0.0)
|
| 130 |
)
|
| 131 |
|
| 132 |
+
# Throttled face recognition
|
| 133 |
if now - last_face_check >= FACE_INTERVAL:
|
| 134 |
last_face_check = now
|
| 135 |
frame = reachy_mini.media.get_frame()
|
| 136 |
if frame is not None:
|
| 137 |
try:
|
| 138 |
+
name = find_match(frame, face_db)
|
| 139 |
if name:
|
| 140 |
speak(f"Hi {name}!", reachy_mini)
|
| 141 |
reachy_mini.goto_sleep()
|
|
|
|
| 171 |
_shared["pending_name"] = None
|
| 172 |
if pending_frame is not None:
|
| 173 |
try:
|
| 174 |
+
add_face(name, pending_frame, face_db)
|
| 175 |
except ValueError as exc:
|
| 176 |
logger.warning("Enrollment failed: %s", exc)
|
| 177 |
speak(f"Nice to meet you, {name}!", reachy_mini)
|