Spaces:

onitsche
/

recognizer

Running

Oliver Nitsche Claude Sonnet 4.6 commited on 27 days ago

Commit

77c6ffa

1 Parent(s): 715f41e

Replace face-recognition/dlib with AWS Rekognition

Eliminates the ~15-minute dlib compilation on Raspberry Pi. boto3 and
Pillow (pure Python, pre-built wheels) replace the face-recognition
package. Face embeddings are now stored in an AWS Rekognition collection
instead of a local face_db.json file; existing enrolled faces will need
to be re-registered.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Files changed (4) hide show

CLAUDE.md +17 -2
pyproject.toml +2 -1
recognizer/face_db.py +72 -38
recognizer/main.py +27 -38

CLAUDE.md CHANGED Viewed

@@ -20,9 +20,24 @@ pip install -e .
 ```bash
 sudo apt-get install espeak-ng        # text-to-speech synthesis
-pip install face-recognition          # compiles dlib from source (~15 min on Pi)
 ```
 ## Running the App
 Run directly (connects to a live Reachy Mini robot):
@@ -72,7 +87,7 @@ SLEEPING  →(speech detected × 3)→  WAKING  →  ACTIVE  →  SLEEPING
 - **ENROLLING**: robot has detected an unrecognised face; waits for name to be submitted via the web UI (`POST /set_name`). Stores encoding in `face_db.json`, says "Nice to meet you, <name>!", then sleeps.
 **Helper modules**:
-- `recognizer/face_db.py` — load/save/query face encodings. Database at `recognizer/face_db.json` (gitignored). `find_match()` tolerance = 0.55.
 - `recognizer/tts.py` — synthesises text via `espeak-ng -s 140 -w <tmp.wav>`, plays via `media.play_sound()`, then sleeps to let playback finish.
 **Settings UI** (`recognizer/static/`):

 ```bash
 sudo apt-get install espeak-ng        # text-to-speech synthesis
 ```
+### AWS Rekognition credentials
+Face recognition is handled by AWS Rekognition (no local compilation required).
+Set credentials on the robot before running:
+```bash
+export AWS_ACCESS_KEY_ID=...
+export AWS_SECRET_ACCESS_KEY=...
+export AWS_DEFAULT_REGION=us-east-1   # or your preferred region
+```
+Or use `aws configure` if the AWS CLI is installed. The app auto-creates a
+Rekognition collection named `reachy-mini-recognizer` on first run.
+The IAM user/role needs: `rekognition:CreateCollection`,
+`rekognition:IndexFaces`, `rekognition:SearchFacesByImage`.
 ## Running the App
 Run directly (connects to a live Reachy Mini robot):
 - **ENROLLING**: robot has detected an unrecognised face; waits for name to be submitted via the web UI (`POST /set_name`). Stores encoding in `face_db.json`, says "Nice to meet you, <name>!", then sleeps.
 **Helper modules**:
+- `recognizer/face_db.py` — AWS Rekognition wrapper. `load()` creates/opens the collection and returns its ID. `find_match(frame_bgr, collection_id)` returns the name or None (raises `NoFaceDetected` if no face present). `add_face(name, frame_bgr, collection_id)` enrolls a face. Similarity threshold = 85 (0–100 scale).
 - `recognizer/tts.py` — synthesises text via `espeak-ng -s 140 -w <tmp.wav>`, plays via `media.play_sound()`, then sleeps to let playback finish.
 **Settings UI** (`recognizer/static/`):

pyproject.toml CHANGED Viewed

@@ -11,7 +11,8 @@ readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "reachy-mini",
-    "face-recognition",
     "scipy",
 ]
 keywords = ["reachy-mini-app", "reachy-mini"]

 requires-python = ">=3.10"
 dependencies = [
     "reachy-mini",
+    "boto3",
+    "Pillow",
     "scipy",
 ]
 keywords = ["reachy-mini-app", "reachy-mini"]

recognizer/face_db.py CHANGED Viewed

@@ -1,52 +1,86 @@
-"""Face database: persist face encodings keyed by name."""
-import json
-from pathlib import Path
 from typing import Optional
-import numpy as np
-try:
-    import face_recognition
-except ImportError as exc:
-    raise ImportError(
-        "face-recognition is required: pip install face-recognition"
-    ) from exc
-DB_PATH = Path(__file__).parent / "face_db.json"
-def load() -> dict[str, list[list[float]]]:
-    if DB_PATH.exists():
-        return json.loads(DB_PATH.read_text())
-    return {}
-def save(db: dict[str, list[list[float]]]) -> None:
-    DB_PATH.write_text(json.dumps(db, indent=2))
 def find_match(
-    encoding: np.ndarray,
-    db: dict[str, list[list[float]]],
-    tolerance: float = 0.55,
 ) -> Optional[str]:
-    for name, enc_list in db.items():
-        known = [np.array(e) for e in enc_list]
-        if any(face_recognition.compare_faces(known, encoding, tolerance=tolerance)):
-            return name
-    return None
-def add_face(
-    name: str,
-    encoding: np.ndarray,
-    db: dict[str, list[list[float]]],
-    max_per_person: int = 5,
-) -> None:
-    if name not in db:
-        db[name] = []
-    if len(db[name]) < max_per_person:
-        db[name].append(encoding.tolist())
-    save(db)

+"""Face database: backed by AWS Rekognition.
+Requires boto3 and AWS credentials configured (e.g. via environment variables
+AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_DEFAULT_REGION, or an IAM role).
+"""
+import io
+import logging
 from typing import Optional
+import boto3
+from botocore.exceptions import BotoCoreError, ClientError
+from PIL import Image
+logger = logging.getLogger(__name__)
+COLLECTION_ID = "reachy-mini-recognizer"
+class NoFaceDetected(Exception):
+    """Raised when no face is found in the provided image."""
+def _client():
+    return boto3.client("rekognition")
+def _to_jpeg(frame_bgr) -> bytes:
+    rgb = frame_bgr[:, :, ::-1]
+    buf = io.BytesIO()
+    Image.fromarray(rgb).save(buf, format="JPEG")
+    return buf.getvalue()
+def load() -> str:
+    """Ensure the Rekognition collection exists; return its ID."""
+    client = _client()
+    try:
+        client.create_collection(CollectionId=COLLECTION_ID)
+        logger.info("Created Rekognition collection '%s'", COLLECTION_ID)
+    except client.exceptions.ResourceAlreadyExistsException:
+        pass
+    return COLLECTION_ID
 def find_match(
+    frame_bgr,
+    collection_id: str,
+    threshold: float = 85.0,
 ) -> Optional[str]:
+    """Search for a face in frame_bgr against the collection.
+    Returns the matched name if recognised, None if a face is present but
+    unknown.  Raises NoFaceDetected if no face appears in the image at all.
+    """
+    client = _client()
+    try:
+        resp = client.search_faces_by_image(
+            CollectionId=collection_id,
+            Image={"Bytes": _to_jpeg(frame_bgr)},
+            FaceMatchThreshold=threshold,
+            MaxFaces=1,
+        )
+        matches = resp.get("FaceMatches", [])
+        if matches:
+            return matches[0]["Face"]["ExternalImageId"]
+        return None  # face detected but not in collection
+    except client.exceptions.InvalidParameterException:
+        raise NoFaceDetected()
+    except (BotoCoreError, ClientError) as exc:
+        logger.warning("Rekognition error: %s", exc)
+        raise NoFaceDetected()
+def add_face(name: str, frame_bgr, collection_id: str) -> None:
+    """Index the face in frame_bgr under name in the collection."""
+    client = _client()
+    resp = client.index_faces(
+        CollectionId=collection_id,
+        Image={"Bytes": _to_jpeg(frame_bgr)},
+        ExternalImageId=name,
+        MaxFaces=1,
+        DetectionAttributes=[],
+    )
+    if not resp.get("FaceRecords"):
+        raise ValueError("No face detected in enrollment image")

recognizer/main.py CHANGED Viewed

@@ -18,17 +18,10 @@ import numpy as np
 from pydantic import BaseModel
 from reachy_mini import ReachyMini, ReachyMiniApp
-from recognizer.face_db import add_face, find_match
 from recognizer.face_db import load as load_face_db
 from recognizer.tts import speak
-try:
-    import face_recognition  # noqa: F401 – checked at import time
-except ImportError as exc:
-    raise ImportError(
-        "face-recognition is required: pip install face-recognition"
-    ) from exc
 logger = logging.getLogger(__name__)
 ACTIVE_TIMEOUT = 15.0     # seconds before returning to sleep with no face
@@ -49,8 +42,6 @@ class Recognizer(ReachyMiniApp):
     request_media_backend: str | None = None
     def run(self, reachy_mini: ReachyMini, stop_event: threading.Event) -> None:
-        import face_recognition as fr
         # --- Shared mutable state (main loop ↔ FastAPI handlers) ---
         _lock = threading.Lock()
         _shared: dict = {
@@ -75,13 +66,13 @@ class Recognizer(ReachyMiniApp):
                 return {"state": _shared["state"]}
         # --- Initialise ---
-        face_db = load_face_db()
         state = State.SLEEPING
         doa_angle = math.pi / 2   # default: facing front
         speech_count = 0
         active_start = 0.0
         last_face_check = 0.0
-        pending_enc: Optional[np.ndarray] = None
         scan_t0 = 0.0             # reference time for head-scan idle animation
         reachy_mini.goto_sleep()
@@ -138,34 +129,29 @@ class Recognizer(ReachyMiniApp):
                     head=_look_direction(1.0, y_scan, 0.0)
                 )
-                # Throttled face recognition
                 if now - last_face_check >= FACE_INTERVAL:
                     last_face_check = now
                     frame = reachy_mini.media.get_frame()
                     if frame is not None:
-                        rgb = frame[::2, ::2, ::-1]   # 2× downsample + BGR→RGB
-                        locs = fr.face_locations(rgb, model="hog")
-                        if locs:
-                            # Scale locations back to full-res for accurate encoding
-                            full_locs = [(t*2, r*2, b*2, l*2) for t, r, b, l in locs]
-                            encs = fr.face_encodings(frame[:, :, ::-1], full_locs)
-                            if encs:
-                                enc = encs[0]
-                                name = find_match(enc, face_db)
-                                if name:
-                                    speak(f"Hi {name}!", reachy_mini)
-                                    reachy_mini.goto_sleep()
-                                    state = State.SLEEPING
-                                else:
-                                    speak(
-                                        "I don't know you yet. "
-                                        "Please enter your name on the control panel.",
-                                        reachy_mini,
-                                    )
-                                    pending_enc = enc
-                                    with _lock:
-                                        _shared["pending_name"] = None
-                                    state = State.ENROLLING
                 # Timeout: nobody showed up
                 if state == State.ACTIVE and time.time() - active_start > ACTIVE_TIMEOUT:
@@ -183,8 +169,11 @@ class Recognizer(ReachyMiniApp):
                 if name:
                     with _lock:
                         _shared["pending_name"] = None
-                    if pending_enc is not None:
-                        add_face(name, pending_enc, face_db)
                     speak(f"Nice to meet you, {name}!", reachy_mini)
                     reachy_mini.goto_sleep()
                     state = State.SLEEPING

 from pydantic import BaseModel
 from reachy_mini import ReachyMini, ReachyMiniApp
+from recognizer.face_db import NoFaceDetected, add_face, find_match
 from recognizer.face_db import load as load_face_db
 from recognizer.tts import speak
 logger = logging.getLogger(__name__)
 ACTIVE_TIMEOUT = 15.0     # seconds before returning to sleep with no face
     request_media_backend: str | None = None
     def run(self, reachy_mini: ReachyMini, stop_event: threading.Event) -> None:
         # --- Shared mutable state (main loop ↔ FastAPI handlers) ---
         _lock = threading.Lock()
         _shared: dict = {
                 return {"state": _shared["state"]}
         # --- Initialise ---
+        collection_id = load_face_db()
         state = State.SLEEPING
         doa_angle = math.pi / 2   # default: facing front
         speech_count = 0
         active_start = 0.0
         last_face_check = 0.0
+        pending_frame: Optional[np.ndarray] = None
         scan_t0 = 0.0             # reference time for head-scan idle animation
         reachy_mini.goto_sleep()
                     head=_look_direction(1.0, y_scan, 0.0)
                 )
+                # Throttled face recognition via AWS Rekognition
                 if now - last_face_check >= FACE_INTERVAL:
                     last_face_check = now
                     frame = reachy_mini.media.get_frame()
                     if frame is not None:
+                        try:
+                            name = find_match(frame, collection_id)
+                            if name:
+                                speak(f"Hi {name}!", reachy_mini)
+                                reachy_mini.goto_sleep()
+                                state = State.SLEEPING
+                            else:
+                                speak(
+                                    "I don't know you yet. "
+                                    "Please enter your name on the control panel.",
+                                    reachy_mini,
+                                )
+                                pending_frame = frame
+                                with _lock:
+                                    _shared["pending_name"] = None
+                                state = State.ENROLLING
+                        except NoFaceDetected:
+                            pass  # no face in frame yet, keep scanning
                 # Timeout: nobody showed up
                 if state == State.ACTIVE and time.time() - active_start > ACTIVE_TIMEOUT:
                 if name:
                     with _lock:
                         _shared["pending_name"] = None
+                    if pending_frame is not None:
+                        try:
+                            add_face(name, pending_frame, collection_id)
+                        except ValueError as exc:
+                            logger.warning("Enrollment failed: %s", exc)
                     speak(f"Nice to meet you, {name}!", reachy_mini)
                     reachy_mini.goto_sleep()
                     state = State.SLEEPING