Spaces:

AdarshDRC
/

visual-search-api

Running

File size: 9,452 Bytes

29bfc1f
 
 
 
 
49be4be
a7e7d81
49be4be
 
 
 
 
 
 
29bfc1f
 
49be4be
 
 
 
 
 
 
 
 
 
 
 
 
29bfc1f
49be4be
29bfc1f
 
 
49be4be
29bfc1f
 
 
 
49be4be
29bfc1f
 
 
 
 
49be4be
29bfc1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
725ae84
29bfc1f
 
725ae84
29bfc1f
 
 
 
 
 
a7e7d81
29bfc1f
 
 
 
 
 
 
49be4be
29bfc1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6a1ae64
 
 
29bfc1f
6a1ae64
 
 
29bfc1f
 
6a1ae64
 
 
29bfc1f
 
 
 
 
 
 
 
 
6a1ae64
 
29bfc1f
 
 
 
 
 
 
49be4be
29bfc1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dfc44c0
 
 
29bfc1f

import os
from dotenv import load_dotenv

load_dotenv()

"""This Files have Imported and created Global Variables which are used 
    Throughout the Project , There are in Total of 56 Variables which"""


# ===============================================================
# Credentals / Secrets (set in HF Space secrets) / KEYS / URLS
# ===============================================================

#Pinecone + Cloudinary credentials (set in HF Space secrets)
DEFAULT_PINECONE_KEY = os.getenv("DEFAULT_PINECONE_KEY", "")
DEFAULT_CLOUDINARY_URL = os.getenv("DEFAULT_CLOUDINARY_URL", "")
#supbase Credentials
SUPABASE_URL = os.getenv("SUPABASE_URL", "")
SUPABASE_SERVICE_KEY = os.getenv("SUPABASE_SERVICE_KEY", "")
# HF Credentials
HF_TOKEN = os.getenv("HF_TOKEN", "")
# Upstash_Redis credentials
UPSTASH_REDIS_URL = os.getenv("UPSTASH_REDIS_URL", "")
UPSTASH_REDIS_TOKEN = os.getenv("UPSTASH_REDIS_TOKEN", "")

# ===============================================================
# Variables Set in HF Spaces Can be configured
# ===============================================================


# Index Names To be used to name variable for storing embeddings genereted from DInov2 and SIGLIP models
IDX_FACES = os.getenv("IDX_FACES", "enterprise-faces")
IDX_OBJECTS = os.getenv("IDX_OBJECTS", "enterprise-objects")

# Configure Index Names for storing Embedding Generated by ARCFACE and ADAFACE
IDX_FACES_ARCFACE = os.getenv("IDX_FACES_ARCFACE", "faces-arcface")
IDX_FACES_ADAFACE = os.getenv("IDX_FACES_ADAFACE", "faces-adaface")

# ──────────────────────────────────────────────────────────────
# Concurrency / limits / For Speeding Up Inference 
# ──────────────────────────────────────────────────────────────
MAX_CONCURRENT_INFERENCES = int(os.getenv("MAX_CONCURRENT_INFERENCES", "2"))
MAX_FILES_PER_UPLOAD = int(os.getenv("MAX_FILES_PER_UPLOAD", "50"))
INFERENCE_CACHE_SIZE = int(os.getenv("INFERENCE_CACHE_SIZE", "128"))



# ──────────────────────────────────────────────────────────────
# Image / detection
# ──────────────────────────────────────────────────────────────
MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1024"))
MAX_CROPS = int(os.getenv("MAX_CROPS", "10"))
YOLO_PERSON_CLASS_ID = 0
YOLO_MIN_CROP_PX = int(os.getenv("YOLO_MIN_CROP_PX", "50"))
YOLO_CONF_THRESHOLD = float(os.getenv("YOLO_CONF_THRESHOLD", "0.25"))

DET_SIZE_PRIMARY = (640, 640)
DET_SCALES = [(1280, 1280), (960, 960), (640, 640)]
IOU_DEDUP_THRESHOLD = float(os.getenv("IOU_DEDUP_THRESHOLD", "0.4"))
MIN_FACE_SIZE = int(os.getenv("MIN_FACE_SIZE", "30"))
MAX_FACES_PER_IMAGE = int(os.getenv("MAX_FACES_PER_IMAGE", "20"))

# Phase 2: relaxed from 0.5 → 0.3 to index more faces (filter at query time)
FACE_QUALITY_GATE = float(os.getenv("FACE_QUALITY_GATE", "0.3"))

# Laplacian variance blur threshold for face crops.
# Faces below this score are excluded from search results AND clustering.
# Typical values: >100 = sharp, 50-100 = acceptable, <50 = blurry.
FACE_BLUR_THRESHOLD = float(os.getenv("FACE_BLUR_THRESHOLD", "50.0"))
CLUSTERING_BLUR_THRESHOLD = float(os.getenv("CLUSTERING_BLUR_THRESHOLD", "30.0"))  # Slightly more lenient for clustering

# ──────────────────────────────────────────────────────────────
# Embedding dimensions
# ──────────────────────────────────────────────────────────────
FACE_DIM = 512
ADAFACE_DIM = 512
FUSED_FACE_DIM = 1024

FACE_CROP_THUMB_SIZE = int(os.getenv("FACE_CROP_THUMB_SIZE", "112"))
FACE_CROP_QUALITY = int(os.getenv("FACE_CROP_QUALITY", "85"))
FACE_CROP_PADDING = float(os.getenv("FACE_CROP_PADDING", "0.2"))
ADAFACE_CROP_PADDING = float(os.getenv("ADAFACE_CROP_PADDING", "0.1"))

ENABLE_ADAFACE = int(os.getenv("ENABLE_ADAFACE", "1"))


# ──────────────────────────────────────────────────────────────
# Phase 1: Speed flags (unchanged, leaving on)
# ──────────────────────────────────────────────────────────────
USE_ONNX_VISION = int(os.getenv("USE_ONNX_VISION", "0"))
ONNX_MODELS_DIR = os.getenv("ONNX_MODELS_DIR", "onnx_models")
ONNX_USE_INT8 = int(os.getenv("ONNX_USE_INT8", "1"))
ENABLE_MULTI_SCALE_FALLBACK = int(os.getenv("ENABLE_MULTI_SCALE_FALLBACK", "1"))
ENABLE_HORIZONTAL_FLIP = int(os.getenv("ENABLE_HORIZONTAL_FLIP", "0"))
OMP_NUM_THREADS = int(os.getenv("OMP_NUM_THREADS", "2"))
MKL_NUM_THREADS = int(os.getenv("MKL_NUM_THREADS", "2"))

# ──────────────────────────────────────────────────────────────
# Phase 2: Recall flags — DEFAULT ON
# ──────────────────────────────────────────────────────────────

# Split-index mode: write ArcFace + AdaFace to separate indexes, score-fuse at query
USE_SPLIT_FACE_INDEXES = int(os.getenv("USE_SPLIT_FACE_INDEXES", "1"))

# Score fusion weights. ArcFace is more discriminative for generic faces;
# AdaFace helps with low-quality/extreme-angle cases. 0.6/0.4 is NIST-FRVT standard.
ARCFACE_WEIGHT = float(os.getenv("ARCFACE_WEIGHT", "0.6"))
ADAFACE_WEIGHT = float(os.getenv("ADAFACE_WEIGHT", "0.4"))

# ArcFace-R100 same-person mean ~0.55, std ~0.12.
# Lowered from 0.30 to 0.22 for better recall on same-person photos.
# Still well above impostor tail (different-person mean ~0.05, std ~0.08).
FACE_MATCH_THRESHOLD = float(os.getenv("FACE_MATCH_THRESHOLD", "0.22"))

# With both models agreeing, 0.26 fused ≈ arc 0.22 + ada 0.30 together.
# Lower threshold when both models agree = more complete galleries.
FUSED_MATCH_THRESHOLD = float(os.getenv("FUSED_MATCH_THRESHOLD", "0.26"))

# ArcFace-only floor (no AdaFace confirmation available).
# Lowered from 0.38 to 0.28 — still strict enough to reject imposters while
# capturing same-person photos across diverse angles/lighting.
ARCFACE_SOLO_THRESHOLD = float(os.getenv("ARCFACE_SOLO_THRESHOLD", "0.28"))

# Query-time augmentation: OFF by default, enabled via deep_search form flag
ENABLE_QUERY_TIME_AUG = int(os.getenv("ENABLE_QUERY_TIME_AUG", "0"))

# Larger top_k: was 50, now 500 so large galleries aren't truncated
FACE_SEARCH_TOP_K = int(os.getenv("FACE_SEARCH_TOP_K", "500"))
OBJECT_SEARCH_TOP_K = int(os.getenv("OBJECT_SEARCH_TOP_K", "100"))

# Final API returns at most this many per-face matches (after dedup)
# Increased from 200 to 500 to show complete photo galleries
FACE_RESULTS_PER_QUERY_CAP = int(os.getenv("FACE_RESULTS_PER_QUERY_CAP", "500"))

# ──────────────────────────────────────────────────────────────
# Phase 3: People View + Job Queue — DEFAULT OFF (opt-in via env)
# ──────────────────────────────────────────────────────────────

# Redis-backed inference cache + job queue (requires Upstash)
# Set UPSTASH_REDIS_URL + UPSTASH_REDIS_TOKEN in HF Space secrets.


# Master toggle: enable the persistent Redis cache (replaces in-memory dict).
# Falls back to in-memory if UPSTASH_REDIS_URL is not set, so this is safe to
# leave True even before Upstash is wired up.
USE_REDIS_CACHE = int(os.getenv("USE_REDIS_CACHE", "0"))

# Async upload mode: when True, POST /api/upload?async=true returns a job_id
# immediately and processes in the background worker.
# Synchronous uploads (no ?async param) always work regardless of this flag.
USE_ASYNC_UPLOADS = int(os.getenv("USE_ASYNC_UPLOADS", "1"))

# Cluster-aware search expansion: after the initial face search, expand results
# to include ALL images in the matched identity clusters.
# Near-100% recall for well-indexed people. Disable if Supabase is slow.
USE_CLUSTER_AWARE_SEARCH = int(os.getenv("USE_CLUSTER_AWARE_SEARCH", "1"))

# HDBSCAN parameters — tuned for typical 1k–10k image libraries
CLUSTER_MIN_SAMPLES = int(os.getenv("CLUSTER_MIN_SAMPLES", "5"))  # Increased to 5 for stricter clustering, fewer duplicates
CLUSTER_MIN_CLUSTER_SIZE = int(os.getenv("CLUSTER_MIN_CLUSTER_SIZE", "5"))  # Increased from 2 to 5, require 5+ faces per cluster
CLUSTER_EPSILON = float(os.getenv("CLUSTER_EPSILON", "0.20"))  # Tightened from 0.35 to 0.20 to reduce duplicate clusters

# Auto re-cluster after every N new face uploads (0 = disabled, manual only)
CLUSTER_AUTO_TRIGGER_EVERY = int(os.getenv("CLUSTER_AUTO_TRIGGER_EVERY", "0"))