AdarshDRC commited on
Commit
725ae84
·
1 Parent(s): 90a3f26

Feat: enhancing people

Browse files
Files changed (2) hide show
  1. src/core/config.py +4 -3
  2. src/services/clustering.py +12 -2
src/core/config.py CHANGED
@@ -64,9 +64,10 @@ MAX_FACES_PER_IMAGE = int(os.getenv("MAX_FACES_PER_IMAGE", "20"))
64
  FACE_QUALITY_GATE = float(os.getenv("FACE_QUALITY_GATE", "0.3"))
65
 
66
  # Laplacian variance blur threshold for face crops.
67
- # Faces below this score are excluded from search results.
68
  # Typical values: >100 = sharp, 50-100 = acceptable, <50 = blurry.
69
  FACE_BLUR_THRESHOLD = float(os.getenv("FACE_BLUR_THRESHOLD", "50.0"))
 
70
 
71
  # ──────────────────────────────────────────────────────────────
72
  # Embedding dimensions
@@ -155,8 +156,8 @@ USE_ASYNC_UPLOADS = int(os.getenv("USE_ASYNC_UPLOADS", "1"))
155
  USE_CLUSTER_AWARE_SEARCH = int(os.getenv("USE_CLUSTER_AWARE_SEARCH", "1"))
156
 
157
  # HDBSCAN parameters — tuned for typical 1k–10k image libraries
158
- CLUSTER_MIN_SAMPLES = int(os.getenv("CLUSTER_MIN_SAMPLES", "3"))
159
- CLUSTER_MIN_CLUSTER_SIZE = int(os.getenv("CLUSTER_MIN_CLUSTER_SIZE", "3"))
160
  CLUSTER_EPSILON = float(os.getenv("CLUSTER_EPSILON", "0.35"))
161
 
162
  # Auto re-cluster after every N new face uploads (0 = disabled, manual only)
 
64
  FACE_QUALITY_GATE = float(os.getenv("FACE_QUALITY_GATE", "0.3"))
65
 
66
  # Laplacian variance blur threshold for face crops.
67
+ # Faces below this score are excluded from search results AND clustering.
68
  # Typical values: >100 = sharp, 50-100 = acceptable, <50 = blurry.
69
  FACE_BLUR_THRESHOLD = float(os.getenv("FACE_BLUR_THRESHOLD", "50.0"))
70
+ CLUSTERING_BLUR_THRESHOLD = float(os.getenv("CLUSTERING_BLUR_THRESHOLD", "30.0")) # Slightly more lenient for clustering
71
 
72
  # ──────────────────────────────────────────────────────────────
73
  # Embedding dimensions
 
156
  USE_CLUSTER_AWARE_SEARCH = int(os.getenv("USE_CLUSTER_AWARE_SEARCH", "1"))
157
 
158
  # HDBSCAN parameters — tuned for typical 1k–10k image libraries
159
+ CLUSTER_MIN_SAMPLES = int(os.getenv("CLUSTER_MIN_SAMPLES", "2")) # Lowered from 3 to include pairs
160
+ CLUSTER_MIN_CLUSTER_SIZE = int(os.getenv("CLUSTER_MIN_CLUSTER_SIZE", "2")) # Lowered from 3 to 2
161
  CLUSTER_EPSILON = float(os.getenv("CLUSTER_EPSILON", "0.35"))
162
 
163
  # Auto re-cluster after every N new face uploads (0 = disabled, manual only)
src/services/clustering.py CHANGED
@@ -35,7 +35,7 @@ from src.core.config import (
35
  IDX_FACES_ARCFACE,
36
  SUPABASE_URL, SUPABASE_SERVICE_KEY,
37
  CLUSTER_MIN_SAMPLES, CLUSTER_MIN_CLUSTER_SIZE, CLUSTER_EPSILON,
38
- FACE_SEARCH_TOP_K,
39
  )
40
 
41
 
@@ -193,7 +193,17 @@ async def run_clustering(pc, user_id: str) -> dict:
193
 
194
  ids = [r["id"] for r in raw]
195
  metas = [r["metadata"] for r in raw]
196
- matrix = np.array([r["values"] for r in raw], dtype=np.float32)
 
 
 
 
 
 
 
 
 
 
197
 
198
  # L2-normalise before euclidean HDBSCAN (equivalent to angular distance)
199
  norms = np.linalg.norm(matrix, axis=1, keepdims=True)
 
35
  IDX_FACES_ARCFACE,
36
  SUPABASE_URL, SUPABASE_SERVICE_KEY,
37
  CLUSTER_MIN_SAMPLES, CLUSTER_MIN_CLUSTER_SIZE, CLUSTER_EPSILON,
38
+ FACE_SEARCH_TOP_K, CLUSTERING_BLUR_THRESHOLD,
39
  )
40
 
41
 
 
193
 
194
  ids = [r["id"] for r in raw]
195
  metas = [r["metadata"] for r in raw]
196
+
197
+ # Filter out blurry faces before clustering
198
+ valid_indices = [i for i, meta in enumerate(metas) if meta.get("blur_score", 100.0) >= CLUSTERING_BLUR_THRESHOLD]
199
+
200
+ if len(valid_indices) < CLUSTER_MIN_CLUSTER_SIZE:
201
+ return {"status": "skipped", "reason": f"only {len(valid_indices)} non-blurry vectors after blur filtering", "vectors": len(raw), "valid_vectors": len(valid_indices)}
202
+
203
+ ids = [ids[i] for i in valid_indices]
204
+ metas = [metas[i] for i in valid_indices]
205
+ raw_values = [r["values"] for r in raw]
206
+ matrix = np.array([raw_values[i] for i in valid_indices], dtype=np.float32)
207
 
208
  # L2-normalise before euclidean HDBSCAN (equivalent to angular distance)
209
  norms = np.linalg.norm(matrix, axis=1, keepdims=True)