Spaces:
Running
Running
Feat: enhancing people
Browse files- src/core/config.py +4 -3
- src/services/clustering.py +12 -2
src/core/config.py
CHANGED
|
@@ -64,9 +64,10 @@ MAX_FACES_PER_IMAGE = int(os.getenv("MAX_FACES_PER_IMAGE", "20"))
|
|
| 64 |
FACE_QUALITY_GATE = float(os.getenv("FACE_QUALITY_GATE", "0.3"))
|
| 65 |
|
| 66 |
# Laplacian variance blur threshold for face crops.
|
| 67 |
-
# Faces below this score are excluded from search results.
|
| 68 |
# Typical values: >100 = sharp, 50-100 = acceptable, <50 = blurry.
|
| 69 |
FACE_BLUR_THRESHOLD = float(os.getenv("FACE_BLUR_THRESHOLD", "50.0"))
|
|
|
|
| 70 |
|
| 71 |
# ──────────────────────────────────────────────────────────────
|
| 72 |
# Embedding dimensions
|
|
@@ -155,8 +156,8 @@ USE_ASYNC_UPLOADS = int(os.getenv("USE_ASYNC_UPLOADS", "1"))
|
|
| 155 |
USE_CLUSTER_AWARE_SEARCH = int(os.getenv("USE_CLUSTER_AWARE_SEARCH", "1"))
|
| 156 |
|
| 157 |
# HDBSCAN parameters — tuned for typical 1k–10k image libraries
|
| 158 |
-
CLUSTER_MIN_SAMPLES = int(os.getenv("CLUSTER_MIN_SAMPLES", "
|
| 159 |
-
CLUSTER_MIN_CLUSTER_SIZE = int(os.getenv("CLUSTER_MIN_CLUSTER_SIZE", "
|
| 160 |
CLUSTER_EPSILON = float(os.getenv("CLUSTER_EPSILON", "0.35"))
|
| 161 |
|
| 162 |
# Auto re-cluster after every N new face uploads (0 = disabled, manual only)
|
|
|
|
| 64 |
FACE_QUALITY_GATE = float(os.getenv("FACE_QUALITY_GATE", "0.3"))
|
| 65 |
|
| 66 |
# Laplacian variance blur threshold for face crops.
|
| 67 |
+
# Faces below this score are excluded from search results AND clustering.
|
| 68 |
# Typical values: >100 = sharp, 50-100 = acceptable, <50 = blurry.
|
| 69 |
FACE_BLUR_THRESHOLD = float(os.getenv("FACE_BLUR_THRESHOLD", "50.0"))
|
| 70 |
+
CLUSTERING_BLUR_THRESHOLD = float(os.getenv("CLUSTERING_BLUR_THRESHOLD", "30.0")) # Slightly more lenient for clustering
|
| 71 |
|
| 72 |
# ──────────────────────────────────────────────────────────────
|
| 73 |
# Embedding dimensions
|
|
|
|
| 156 |
USE_CLUSTER_AWARE_SEARCH = int(os.getenv("USE_CLUSTER_AWARE_SEARCH", "1"))
|
| 157 |
|
| 158 |
# HDBSCAN parameters — tuned for typical 1k–10k image libraries
|
| 159 |
+
CLUSTER_MIN_SAMPLES = int(os.getenv("CLUSTER_MIN_SAMPLES", "2")) # Lowered from 3 to include pairs
|
| 160 |
+
CLUSTER_MIN_CLUSTER_SIZE = int(os.getenv("CLUSTER_MIN_CLUSTER_SIZE", "2")) # Lowered from 3 to 2
|
| 161 |
CLUSTER_EPSILON = float(os.getenv("CLUSTER_EPSILON", "0.35"))
|
| 162 |
|
| 163 |
# Auto re-cluster after every N new face uploads (0 = disabled, manual only)
|
src/services/clustering.py
CHANGED
|
@@ -35,7 +35,7 @@ from src.core.config import (
|
|
| 35 |
IDX_FACES_ARCFACE,
|
| 36 |
SUPABASE_URL, SUPABASE_SERVICE_KEY,
|
| 37 |
CLUSTER_MIN_SAMPLES, CLUSTER_MIN_CLUSTER_SIZE, CLUSTER_EPSILON,
|
| 38 |
-
FACE_SEARCH_TOP_K,
|
| 39 |
)
|
| 40 |
|
| 41 |
|
|
@@ -193,7 +193,17 @@ async def run_clustering(pc, user_id: str) -> dict:
|
|
| 193 |
|
| 194 |
ids = [r["id"] for r in raw]
|
| 195 |
metas = [r["metadata"] for r in raw]
|
| 196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
# L2-normalise before euclidean HDBSCAN (equivalent to angular distance)
|
| 199 |
norms = np.linalg.norm(matrix, axis=1, keepdims=True)
|
|
|
|
| 35 |
IDX_FACES_ARCFACE,
|
| 36 |
SUPABASE_URL, SUPABASE_SERVICE_KEY,
|
| 37 |
CLUSTER_MIN_SAMPLES, CLUSTER_MIN_CLUSTER_SIZE, CLUSTER_EPSILON,
|
| 38 |
+
FACE_SEARCH_TOP_K, CLUSTERING_BLUR_THRESHOLD,
|
| 39 |
)
|
| 40 |
|
| 41 |
|
|
|
|
| 193 |
|
| 194 |
ids = [r["id"] for r in raw]
|
| 195 |
metas = [r["metadata"] for r in raw]
|
| 196 |
+
|
| 197 |
+
# Filter out blurry faces before clustering
|
| 198 |
+
valid_indices = [i for i, meta in enumerate(metas) if meta.get("blur_score", 100.0) >= CLUSTERING_BLUR_THRESHOLD]
|
| 199 |
+
|
| 200 |
+
if len(valid_indices) < CLUSTER_MIN_CLUSTER_SIZE:
|
| 201 |
+
return {"status": "skipped", "reason": f"only {len(valid_indices)} non-blurry vectors after blur filtering", "vectors": len(raw), "valid_vectors": len(valid_indices)}
|
| 202 |
+
|
| 203 |
+
ids = [ids[i] for i in valid_indices]
|
| 204 |
+
metas = [metas[i] for i in valid_indices]
|
| 205 |
+
raw_values = [r["values"] for r in raw]
|
| 206 |
+
matrix = np.array([raw_values[i] for i in valid_indices], dtype=np.float32)
|
| 207 |
|
| 208 |
# L2-normalise before euclidean HDBSCAN (equivalent to angular distance)
|
| 209 |
norms = np.linalg.norm(matrix, axis=1, keepdims=True)
|