Oliver Nitsche Claude Sonnet 4.6 commited on
Commit
2fda523
·
1 Parent(s): 77c6ffa

Switch to local ONNX face recognition (no compilation, no cloud)

Browse files

Replaces AWS Rekognition with onnxruntime + OpenCV Haar cascade +
InsightFace MobileFaceNet (w600k_mbf.onnx, ~17 MB downloaded once on
first run). Both onnxruntime and opencv-python-headless ship pre-built
ARM64 wheels so pip install is fast with no compilation step. Face
embeddings are stored locally in face_db.json as before.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (5) hide show
  1. .gitignore +3 -1
  2. CLAUDE.md +7 -14
  3. pyproject.toml +2 -2
  4. recognizer/face_db.py +122 -58
  5. recognizer/main.py +5 -5
.gitignore CHANGED
@@ -1,3 +1,5 @@
1
  __pycache__/
2
  *.egg-info/
3
- build/
 
 
 
1
  __pycache__/
2
  *.egg-info/
3
+ build/
4
+ recognizer/models/
5
+ recognizer/face_db.json
CLAUDE.md CHANGED
@@ -22,21 +22,14 @@ pip install -e .
22
  sudo apt-get install espeak-ng # text-to-speech synthesis
23
  ```
24
 
25
- ### AWS Rekognition credentials
26
 
27
- Face recognition is handled by AWS Rekognition (no local compilation required).
28
- Set credentials on the robot before running:
 
 
29
 
30
- ```bash
31
- export AWS_ACCESS_KEY_ID=...
32
- export AWS_SECRET_ACCESS_KEY=...
33
- export AWS_DEFAULT_REGION=us-east-1 # or your preferred region
34
- ```
35
-
36
- Or use `aws configure` if the AWS CLI is installed. The app auto-creates a
37
- Rekognition collection named `reachy-mini-recognizer` on first run.
38
- The IAM user/role needs: `rekognition:CreateCollection`,
39
- `rekognition:IndexFaces`, `rekognition:SearchFacesByImage`.
40
 
41
  ## Running the App
42
 
@@ -87,7 +80,7 @@ SLEEPING →(speech detected × 3)→ WAKING → ACTIVE → SLEEPING
87
  - **ENROLLING**: robot has detected an unrecognised face; waits for name to be submitted via the web UI (`POST /set_name`). Stores encoding in `face_db.json`, says "Nice to meet you, <name>!", then sleeps.
88
 
89
  **Helper modules**:
90
- - `recognizer/face_db.py` — AWS Rekognition wrapper. `load()` creates/opens the collection and returns its ID. `find_match(frame_bgr, collection_id)` returns the name or None (raises `NoFaceDetected` if no face present). `add_face(name, frame_bgr, collection_id)` enrolls a face. Similarity threshold = 85 (0–100 scale).
91
  - `recognizer/tts.py` — synthesises text via `espeak-ng -s 140 -w <tmp.wav>`, plays via `media.play_sound()`, then sleeps to let playback finish.
92
 
93
  **Settings UI** (`recognizer/static/`):
 
22
  sudo apt-get install espeak-ng # text-to-speech synthesis
23
  ```
24
 
25
+ ### Face recognition model (one-time download)
26
 
27
+ Face recognition runs **locally** using ONNX Runtime (no cloud account needed).
28
+ On first run the app downloads the InsightFace MobileFaceNet model (~17 MB)
29
+ from GitHub and caches it at `recognizer/models/w600k_mbf.onnx`.
30
+ Requires internet access the first time only; fully offline thereafter.
31
 
32
+ Requires **64-bit Raspberry Pi OS** (onnxruntime ships pre-built aarch64 wheels).
 
 
 
 
 
 
 
 
 
33
 
34
  ## Running the App
35
 
 
80
  - **ENROLLING**: robot has detected an unrecognised face; waits for name to be submitted via the web UI (`POST /set_name`). Stores encoding in `face_db.json`, says "Nice to meet you, <name>!", then sleeps.
81
 
82
  **Helper modules**:
83
+ - `recognizer/face_db.py` — local face recognition via ONNX Runtime. `load()` warms up the ONNX session (downloads model on first run) and returns the embedding DB dict. `find_match(frame_bgr, db)` detects with OpenCV Haar cascade, embeds with MobileFaceNet, matches by cosine similarity (threshold 0.35); raises `NoFaceDetected` if no face. `add_face(name, frame_bgr, db)` enrolls a face. DB stored in `recognizer/face_db.json`.
84
  - `recognizer/tts.py` — synthesises text via `espeak-ng -s 140 -w <tmp.wav>`, plays via `media.play_sound()`, then sleeps to let playback finish.
85
 
86
  **Settings UI** (`recognizer/static/`):
pyproject.toml CHANGED
@@ -11,8 +11,8 @@ readme = "README.md"
11
  requires-python = ">=3.10"
12
  dependencies = [
13
  "reachy-mini",
14
- "boto3",
15
- "Pillow",
16
  "scipy",
17
  ]
18
  keywords = ["reachy-mini-app", "reachy-mini"]
 
11
  requires-python = ">=3.10"
12
  dependencies = [
13
  "reachy-mini",
14
+ "onnxruntime",
15
+ "opencv-python-headless",
16
  "scipy",
17
  ]
18
  keywords = ["reachy-mini-app", "reachy-mini"]
recognizer/face_db.py CHANGED
@@ -1,86 +1,150 @@
1
- """Face database: backed by AWS Rekognition.
2
 
3
- Requires boto3 and AWS credentials configured (e.g. via environment variables
4
- AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION, or an IAM role).
 
 
 
 
 
5
  """
6
 
7
- import io
8
  import logging
 
 
 
9
  from typing import Optional
10
 
11
- import boto3
12
- from botocore.exceptions import BotoCoreError, ClientError
13
- from PIL import Image
14
 
15
  logger = logging.getLogger(__name__)
16
 
17
- COLLECTION_ID = "reachy-mini-recognizer"
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  class NoFaceDetected(Exception):
21
  """Raised when no face is found in the provided image."""
22
 
23
 
24
- def _client():
25
- return boto3.client("rekognition")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
 
28
- def _to_jpeg(frame_bgr) -> bytes:
29
- rgb = frame_bgr[:, :, ::-1]
30
- buf = io.BytesIO()
31
- Image.fromarray(rgb).save(buf, format="JPEG")
32
- return buf.getvalue()
33
 
 
 
 
 
 
 
34
 
35
- def load() -> str:
36
- """Ensure the Rekognition collection exists; return its ID."""
37
- client = _client()
38
- try:
39
- client.create_collection(CollectionId=COLLECTION_ID)
40
- logger.info("Created Rekognition collection '%s'", COLLECTION_ID)
41
- except client.exceptions.ResourceAlreadyExistsException:
42
- pass
43
- return COLLECTION_ID
44
 
45
 
46
  def find_match(
47
- frame_bgr,
48
- collection_id: str,
49
- threshold: float = 85.0,
50
  ) -> Optional[str]:
51
- """Search for a face in frame_bgr against the collection.
52
 
53
- Returns the matched name if recognised, None if a face is present but
54
- unknown. Raises NoFaceDetected if no face appears in the image at all.
55
  """
56
- client = _client()
57
- try:
58
- resp = client.search_faces_by_image(
59
- CollectionId=collection_id,
60
- Image={"Bytes": _to_jpeg(frame_bgr)},
61
- FaceMatchThreshold=threshold,
62
- MaxFaces=1,
63
- )
64
- matches = resp.get("FaceMatches", [])
65
- if matches:
66
- return matches[0]["Face"]["ExternalImageId"]
67
- return None # face detected but not in collection
68
- except client.exceptions.InvalidParameterException:
69
  raise NoFaceDetected()
70
- except (BotoCoreError, ClientError) as exc:
71
- logger.warning("Rekognition error: %s", exc)
72
- raise NoFaceDetected()
73
-
74
 
75
- def add_face(name: str, frame_bgr, collection_id: str) -> None:
76
- """Index the face in frame_bgr under name in the collection."""
77
- client = _client()
78
- resp = client.index_faces(
79
- CollectionId=collection_id,
80
- Image={"Bytes": _to_jpeg(frame_bgr)},
81
- ExternalImageId=name,
82
- MaxFaces=1,
83
- DetectionAttributes=[],
84
- )
85
- if not resp.get("FaceRecords"):
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  raise ValueError("No face detected in enrollment image")
 
 
 
 
 
 
 
 
 
1
+ """Face database: local face recognition via ONNX + OpenCV.
2
 
3
+ Detection : OpenCV Haar cascade (built into opencv, no download).
4
+ Embedding : InsightFace MobileFaceNet (w600k_mbf.onnx, ~17 MB, downloaded
5
+ once on first run from the InsightFace GitHub release).
6
+ Matching : cosine similarity on L2-normalised 512-D embeddings.
7
+ Storage : recognizer/face_db.json (gitignored).
8
+
9
+ No compilation required — onnxruntime ships pre-built ARM64 wheels.
10
  """
11
 
12
+ import json
13
  import logging
14
+ import urllib.request
15
+ import zipfile
16
+ from pathlib import Path
17
  from typing import Optional
18
 
19
+ import cv2
20
+ import numpy as np
21
+ import onnxruntime as ort
22
 
23
  logger = logging.getLogger(__name__)
24
 
25
+ DB_PATH = Path(__file__).parent / "face_db.json"
26
+ MODEL_DIR = Path(__file__).parent / "models"
27
+ MODEL_FILE = MODEL_DIR / "w600k_mbf.onnx"
28
+ MODEL_URL = (
29
+ "https://github.com/deepinsight/insightface"
30
+ "/releases/download/v0.7/buffalo_sc.zip"
31
+ )
32
+ _REC_ENTRY = "buffalo_sc/w600k_mbf.onnx" # path inside the zip
33
+
34
+ _CASCADE = cv2.CascadeClassifier(
35
+ cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
36
+ )
37
+ _session: Optional[ort.InferenceSession] = None
38
 
39
 
40
  class NoFaceDetected(Exception):
41
  """Raised when no face is found in the provided image."""
42
 
43
 
44
+ # ---------------------------------------------------------------------------
45
+ # Internal helpers
46
+ # ---------------------------------------------------------------------------
47
+
48
+ def _ensure_model() -> None:
49
+ if MODEL_FILE.exists():
50
+ return
51
+ MODEL_DIR.mkdir(exist_ok=True)
52
+ zip_path = MODEL_DIR / "buffalo_sc.zip"
53
+ logger.info("Downloading face recognition model (~17 MB) — one-time setup...")
54
+ urllib.request.urlretrieve(MODEL_URL, zip_path)
55
+ with zipfile.ZipFile(zip_path) as zf:
56
+ with zf.open(_REC_ENTRY) as src, open(MODEL_FILE, "wb") as dst:
57
+ dst.write(src.read())
58
+ zip_path.unlink()
59
+ logger.info("Model ready at %s", MODEL_FILE)
60
+
61
+
62
+ def _get_session() -> ort.InferenceSession:
63
+ global _session
64
+ if _session is None:
65
+ _ensure_model()
66
+ _session = ort.InferenceSession(
67
+ str(MODEL_FILE), providers=["CPUExecutionProvider"]
68
+ )
69
+ return _session
70
+
71
+
72
+ def _detect(frame_bgr: np.ndarray) -> list[tuple[int, int, int, int]]:
73
+ gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
74
+ boxes = _CASCADE.detectMultiScale(
75
+ gray, scaleFactor=1.1, minNeighbors=4, minSize=(60, 60)
76
+ )
77
+ return [tuple(b) for b in boxes] if len(boxes) > 0 else []
78
+
79
+
80
+ def _embed(face_bgr: np.ndarray) -> np.ndarray:
81
+ img = cv2.resize(face_bgr, (112, 112)).astype(np.float32)
82
+ img = (img - 127.5) / 127.5
83
+ inp = np.transpose(img, (2, 0, 1))[np.newaxis] # NCHW
84
+ sess = _get_session()
85
+ emb = sess.run(None, {sess.get_inputs()[0].name: inp})[0][0]
86
+ return emb / np.linalg.norm(emb) # L2-normalise
87
 
88
 
89
+ # ---------------------------------------------------------------------------
90
+ # Public API (called from main.py)
91
+ # ---------------------------------------------------------------------------
 
 
92
 
93
+ def load() -> dict[str, list[list[float]]]:
94
+ """Load face DB from disk and warm up the ONNX session."""
95
+ _get_session() # triggers one-time model download
96
+ if DB_PATH.exists():
97
+ return json.loads(DB_PATH.read_text())
98
+ return {}
99
 
100
+
101
+ def save(db: dict[str, list[list[float]]]) -> None:
102
+ DB_PATH.write_text(json.dumps(db, indent=2))
 
 
 
 
 
 
103
 
104
 
105
  def find_match(
106
+ frame_bgr: np.ndarray,
107
+ db: dict[str, list[list[float]]],
108
+ threshold: float = 0.35,
109
  ) -> Optional[str]:
110
+ """Return matched name if recognised, None if face present but unknown.
111
 
112
+ Raises NoFaceDetected if no face appears in the image at all.
 
113
  """
114
+ boxes = _detect(frame_bgr)
115
+ if not boxes:
 
 
 
 
 
 
 
 
 
 
 
116
  raise NoFaceDetected()
 
 
 
 
117
 
118
+ x, y, w, h = boxes[0]
119
+ emb = _embed(frame_bgr[y : y + h, x : x + w])
120
+
121
+ best_name, best_sim = None, -1.0
122
+ for name, enc_list in db.items():
123
+ for enc in enc_list:
124
+ sim = float(np.dot(emb, np.array(enc)))
125
+ if sim > best_sim:
126
+ best_sim, best_name = sim, name
127
+
128
+ if best_name is not None and best_sim >= threshold:
129
+ return best_name
130
+ return None # face present but not recognised (or DB is empty)
131
+
132
+
133
+ def add_face(
134
+ name: str,
135
+ frame_bgr: np.ndarray,
136
+ db: dict[str, list[list[float]]],
137
+ max_per_person: int = 5,
138
+ ) -> None:
139
+ """Embed and store the face from frame_bgr under name."""
140
+ boxes = _detect(frame_bgr)
141
+ if not boxes:
142
  raise ValueError("No face detected in enrollment image")
143
+
144
+ x, y, w, h = boxes[0]
145
+ emb = _embed(frame_bgr[y : y + h, x : x + w])
146
+
147
+ db.setdefault(name, [])
148
+ if len(db[name]) < max_per_person:
149
+ db[name].append(emb.tolist())
150
+ save(db)
recognizer/main.py CHANGED
@@ -66,7 +66,7 @@ class Recognizer(ReachyMiniApp):
66
  return {"state": _shared["state"]}
67
 
68
  # --- Initialise ---
69
- collection_id = load_face_db()
70
  state = State.SLEEPING
71
  doa_angle = math.pi / 2 # default: facing front
72
  speech_count = 0
@@ -112,7 +112,7 @@ class Recognizer(ReachyMiniApp):
112
  active_start = time.time()
113
  scan_t0 = active_start
114
  last_face_check = 0.0
115
- pending_enc = None
116
  state = State.ACTIVE
117
 
118
  # ---------- ACTIVE ----------
@@ -129,13 +129,13 @@ class Recognizer(ReachyMiniApp):
129
  head=_look_direction(1.0, y_scan, 0.0)
130
  )
131
 
132
- # Throttled face recognition via AWS Rekognition
133
  if now - last_face_check >= FACE_INTERVAL:
134
  last_face_check = now
135
  frame = reachy_mini.media.get_frame()
136
  if frame is not None:
137
  try:
138
- name = find_match(frame, collection_id)
139
  if name:
140
  speak(f"Hi {name}!", reachy_mini)
141
  reachy_mini.goto_sleep()
@@ -171,7 +171,7 @@ class Recognizer(ReachyMiniApp):
171
  _shared["pending_name"] = None
172
  if pending_frame is not None:
173
  try:
174
- add_face(name, pending_frame, collection_id)
175
  except ValueError as exc:
176
  logger.warning("Enrollment failed: %s", exc)
177
  speak(f"Nice to meet you, {name}!", reachy_mini)
 
66
  return {"state": _shared["state"]}
67
 
68
  # --- Initialise ---
69
+ face_db = load_face_db()
70
  state = State.SLEEPING
71
  doa_angle = math.pi / 2 # default: facing front
72
  speech_count = 0
 
112
  active_start = time.time()
113
  scan_t0 = active_start
114
  last_face_check = 0.0
115
+ pending_frame = None
116
  state = State.ACTIVE
117
 
118
  # ---------- ACTIVE ----------
 
129
  head=_look_direction(1.0, y_scan, 0.0)
130
  )
131
 
132
+ # Throttled face recognition
133
  if now - last_face_check >= FACE_INTERVAL:
134
  last_face_check = now
135
  frame = reachy_mini.media.get_frame()
136
  if frame is not None:
137
  try:
138
+ name = find_match(frame, face_db)
139
  if name:
140
  speak(f"Hi {name}!", reachy_mini)
141
  reachy_mini.goto_sleep()
 
171
  _shared["pending_name"] = None
172
  if pending_frame is not None:
173
  try:
174
+ add_face(name, pending_frame, face_db)
175
  except ValueError as exc:
176
  logger.warning("Enrollment failed: %s", exc)
177
  speak(f"Nice to meet you, {name}!", reachy_mini)