visual-search-api / src /core /config.py
AdarshDRC's picture
Implementing pagination
dfc44c0
import os
from dotenv import load_dotenv
load_dotenv()
"""This Files have Imported and created Global Variables which are used
Throughout the Project , There are in Total of 56 Variables which"""
# ===============================================================
# Credentals / Secrets (set in HF Space secrets) / KEYS / URLS
# ===============================================================
#Pinecone + Cloudinary credentials (set in HF Space secrets)
DEFAULT_PINECONE_KEY = os.getenv("DEFAULT_PINECONE_KEY", "")
DEFAULT_CLOUDINARY_URL = os.getenv("DEFAULT_CLOUDINARY_URL", "")
#supbase Credentials
SUPABASE_URL = os.getenv("SUPABASE_URL", "")
SUPABASE_SERVICE_KEY = os.getenv("SUPABASE_SERVICE_KEY", "")
# HF Credentials
HF_TOKEN = os.getenv("HF_TOKEN", "")
# Upstash_Redis credentials
UPSTASH_REDIS_URL = os.getenv("UPSTASH_REDIS_URL", "")
UPSTASH_REDIS_TOKEN = os.getenv("UPSTASH_REDIS_TOKEN", "")
# ===============================================================
# Variables Set in HF Spaces Can be configured
# ===============================================================
# Index Names To be used to name variable for storing embeddings genereted from DInov2 and SIGLIP models
IDX_FACES = os.getenv("IDX_FACES", "enterprise-faces")
IDX_OBJECTS = os.getenv("IDX_OBJECTS", "enterprise-objects")
# Configure Index Names for storing Embedding Generated by ARCFACE and ADAFACE
IDX_FACES_ARCFACE = os.getenv("IDX_FACES_ARCFACE", "faces-arcface")
IDX_FACES_ADAFACE = os.getenv("IDX_FACES_ADAFACE", "faces-adaface")
# ──────────────────────────────────────────────────────────────
# Concurrency / limits / For Speeding Up Inference
# ──────────────────────────────────────────────────────────────
MAX_CONCURRENT_INFERENCES = int(os.getenv("MAX_CONCURRENT_INFERENCES", "2"))
MAX_FILES_PER_UPLOAD = int(os.getenv("MAX_FILES_PER_UPLOAD", "50"))
INFERENCE_CACHE_SIZE = int(os.getenv("INFERENCE_CACHE_SIZE", "128"))
# ──────────────────────────────────────────────────────────────
# Image / detection
# ──────────────────────────────────────────────────────────────
MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1024"))
MAX_CROPS = int(os.getenv("MAX_CROPS", "10"))
YOLO_PERSON_CLASS_ID = 0
YOLO_MIN_CROP_PX = int(os.getenv("YOLO_MIN_CROP_PX", "50"))
YOLO_CONF_THRESHOLD = float(os.getenv("YOLO_CONF_THRESHOLD", "0.25"))
DET_SIZE_PRIMARY = (640, 640)
DET_SCALES = [(1280, 1280), (960, 960), (640, 640)]
IOU_DEDUP_THRESHOLD = float(os.getenv("IOU_DEDUP_THRESHOLD", "0.4"))
MIN_FACE_SIZE = int(os.getenv("MIN_FACE_SIZE", "30"))
MAX_FACES_PER_IMAGE = int(os.getenv("MAX_FACES_PER_IMAGE", "20"))
# Phase 2: relaxed from 0.5 → 0.3 to index more faces (filter at query time)
FACE_QUALITY_GATE = float(os.getenv("FACE_QUALITY_GATE", "0.3"))
# Laplacian variance blur threshold for face crops.
# Faces below this score are excluded from search results AND clustering.
# Typical values: >100 = sharp, 50-100 = acceptable, <50 = blurry.
FACE_BLUR_THRESHOLD = float(os.getenv("FACE_BLUR_THRESHOLD", "50.0"))
CLUSTERING_BLUR_THRESHOLD = float(os.getenv("CLUSTERING_BLUR_THRESHOLD", "30.0")) # Slightly more lenient for clustering
# ──────────────────────────────────────────────────────────────
# Embedding dimensions
# ──────────────────────────────────────────────────────────────
FACE_DIM = 512
ADAFACE_DIM = 512
FUSED_FACE_DIM = 1024
FACE_CROP_THUMB_SIZE = int(os.getenv("FACE_CROP_THUMB_SIZE", "112"))
FACE_CROP_QUALITY = int(os.getenv("FACE_CROP_QUALITY", "85"))
FACE_CROP_PADDING = float(os.getenv("FACE_CROP_PADDING", "0.2"))
ADAFACE_CROP_PADDING = float(os.getenv("ADAFACE_CROP_PADDING", "0.1"))
ENABLE_ADAFACE = int(os.getenv("ENABLE_ADAFACE", "1"))
# ──────────────────────────────────────────────────────────────
# Phase 1: Speed flags (unchanged, leaving on)
# ──────────────────────────────────────────────────────────────
USE_ONNX_VISION = int(os.getenv("USE_ONNX_VISION", "0"))
ONNX_MODELS_DIR = os.getenv("ONNX_MODELS_DIR", "onnx_models")
ONNX_USE_INT8 = int(os.getenv("ONNX_USE_INT8", "1"))
ENABLE_MULTI_SCALE_FALLBACK = int(os.getenv("ENABLE_MULTI_SCALE_FALLBACK", "1"))
ENABLE_HORIZONTAL_FLIP = int(os.getenv("ENABLE_HORIZONTAL_FLIP", "0"))
OMP_NUM_THREADS = int(os.getenv("OMP_NUM_THREADS", "2"))
MKL_NUM_THREADS = int(os.getenv("MKL_NUM_THREADS", "2"))
# ──────────────────────────────────────────────────────────────
# Phase 2: Recall flags — DEFAULT ON
# ──────────────────────────────────────────────────────────────
# Split-index mode: write ArcFace + AdaFace to separate indexes, score-fuse at query
USE_SPLIT_FACE_INDEXES = int(os.getenv("USE_SPLIT_FACE_INDEXES", "1"))
# Score fusion weights. ArcFace is more discriminative for generic faces;
# AdaFace helps with low-quality/extreme-angle cases. 0.6/0.4 is NIST-FRVT standard.
ARCFACE_WEIGHT = float(os.getenv("ARCFACE_WEIGHT", "0.6"))
ADAFACE_WEIGHT = float(os.getenv("ADAFACE_WEIGHT", "0.4"))
# ArcFace-R100 same-person mean ~0.55, std ~0.12.
# Lowered from 0.30 to 0.22 for better recall on same-person photos.
# Still well above impostor tail (different-person mean ~0.05, std ~0.08).
FACE_MATCH_THRESHOLD = float(os.getenv("FACE_MATCH_THRESHOLD", "0.22"))
# With both models agreeing, 0.26 fused ≈ arc 0.22 + ada 0.30 together.
# Lower threshold when both models agree = more complete galleries.
FUSED_MATCH_THRESHOLD = float(os.getenv("FUSED_MATCH_THRESHOLD", "0.26"))
# ArcFace-only floor (no AdaFace confirmation available).
# Lowered from 0.38 to 0.28 — still strict enough to reject imposters while
# capturing same-person photos across diverse angles/lighting.
ARCFACE_SOLO_THRESHOLD = float(os.getenv("ARCFACE_SOLO_THRESHOLD", "0.28"))
# Query-time augmentation: OFF by default, enabled via deep_search form flag
ENABLE_QUERY_TIME_AUG = int(os.getenv("ENABLE_QUERY_TIME_AUG", "0"))
# Larger top_k: was 50, now 500 so large galleries aren't truncated
FACE_SEARCH_TOP_K = int(os.getenv("FACE_SEARCH_TOP_K", "500"))
OBJECT_SEARCH_TOP_K = int(os.getenv("OBJECT_SEARCH_TOP_K", "100"))
# Final API returns at most this many per-face matches (after dedup)
# Increased from 200 to 500 to show complete photo galleries
FACE_RESULTS_PER_QUERY_CAP = int(os.getenv("FACE_RESULTS_PER_QUERY_CAP", "500"))
# ──────────────────────────────────────────────────────────────
# Phase 3: People View + Job Queue — DEFAULT OFF (opt-in via env)
# ──────────────────────────────────────────────────────────────
# Redis-backed inference cache + job queue (requires Upstash)
# Set UPSTASH_REDIS_URL + UPSTASH_REDIS_TOKEN in HF Space secrets.
# Master toggle: enable the persistent Redis cache (replaces in-memory dict).
# Falls back to in-memory if UPSTASH_REDIS_URL is not set, so this is safe to
# leave True even before Upstash is wired up.
USE_REDIS_CACHE = int(os.getenv("USE_REDIS_CACHE", "0"))
# Async upload mode: when True, POST /api/upload?async=true returns a job_id
# immediately and processes in the background worker.
# Synchronous uploads (no ?async param) always work regardless of this flag.
USE_ASYNC_UPLOADS = int(os.getenv("USE_ASYNC_UPLOADS", "1"))
# Cluster-aware search expansion: after the initial face search, expand results
# to include ALL images in the matched identity clusters.
# Near-100% recall for well-indexed people. Disable if Supabase is slow.
USE_CLUSTER_AWARE_SEARCH = int(os.getenv("USE_CLUSTER_AWARE_SEARCH", "1"))
# HDBSCAN parameters — tuned for typical 1k–10k image libraries
CLUSTER_MIN_SAMPLES = int(os.getenv("CLUSTER_MIN_SAMPLES", "5")) # Increased to 5 for stricter clustering, fewer duplicates
CLUSTER_MIN_CLUSTER_SIZE = int(os.getenv("CLUSTER_MIN_CLUSTER_SIZE", "5")) # Increased from 2 to 5, require 5+ faces per cluster
CLUSTER_EPSILON = float(os.getenv("CLUSTER_EPSILON", "0.20")) # Tightened from 0.35 to 0.20 to reduce duplicate clusters
# Auto re-cluster after every N new face uploads (0 = disabled, manual only)
CLUSTER_AUTO_TRIGGER_EVERY = int(os.getenv("CLUSTER_AUTO_TRIGGER_EVERY", "0"))