Spaces:

AdarshDRC
/

visual-search-api

Running

App Files Files Community

visual-search-api / src /core /config.py

AdarshDRC

Implementing pagination

dfc44c0 8 days ago

raw

history blame contribute delete

9.45 kB

	import os
	from dotenv import load_dotenv

	load_dotenv()

	"""This Files have Imported and created Global Variables which are used
	Throughout the Project , There are in Total of 56 Variables which"""


	# ===============================================================
	# Credentals / Secrets (set in HF Space secrets) / KEYS / URLS
	# ===============================================================

	#Pinecone + Cloudinary credentials (set in HF Space secrets)
	DEFAULT_PINECONE_KEY = os.getenv("DEFAULT_PINECONE_KEY", "")
	DEFAULT_CLOUDINARY_URL = os.getenv("DEFAULT_CLOUDINARY_URL", "")
	#supbase Credentials
	SUPABASE_URL = os.getenv("SUPABASE_URL", "")
	SUPABASE_SERVICE_KEY = os.getenv("SUPABASE_SERVICE_KEY", "")
	# HF Credentials
	HF_TOKEN = os.getenv("HF_TOKEN", "")
	# Upstash_Redis credentials
	UPSTASH_REDIS_URL = os.getenv("UPSTASH_REDIS_URL", "")
	UPSTASH_REDIS_TOKEN = os.getenv("UPSTASH_REDIS_TOKEN", "")

	# ===============================================================
	# Variables Set in HF Spaces Can be configured
	# ===============================================================


	# Index Names To be used to name variable for storing embeddings genereted from DInov2 and SIGLIP models
	IDX_FACES = os.getenv("IDX_FACES", "enterprise-faces")
	IDX_OBJECTS = os.getenv("IDX_OBJECTS", "enterprise-objects")

	# Configure Index Names for storing Embedding Generated by ARCFACE and ADAFACE
	IDX_FACES_ARCFACE = os.getenv("IDX_FACES_ARCFACE", "faces-arcface")
	IDX_FACES_ADAFACE = os.getenv("IDX_FACES_ADAFACE", "faces-adaface")

	# ──────────────────────────────────────────────────────────────
	# Concurrency / limits / For Speeding Up Inference
	# ──────────────────────────────────────────────────────────────
	MAX_CONCURRENT_INFERENCES = int(os.getenv("MAX_CONCURRENT_INFERENCES", "2"))
	MAX_FILES_PER_UPLOAD = int(os.getenv("MAX_FILES_PER_UPLOAD", "50"))
	INFERENCE_CACHE_SIZE = int(os.getenv("INFERENCE_CACHE_SIZE", "128"))



	# ──────────────────────────────────────────────────────────────
	# Image / detection
	# ──────────────────────────────────────────────────────────────
	MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "1024"))
	MAX_CROPS = int(os.getenv("MAX_CROPS", "10"))
	YOLO_PERSON_CLASS_ID = 0
	YOLO_MIN_CROP_PX = int(os.getenv("YOLO_MIN_CROP_PX", "50"))
	YOLO_CONF_THRESHOLD = float(os.getenv("YOLO_CONF_THRESHOLD", "0.25"))

	DET_SIZE_PRIMARY = (640, 640)
	DET_SCALES = [(1280, 1280), (960, 960), (640, 640)]
	IOU_DEDUP_THRESHOLD = float(os.getenv("IOU_DEDUP_THRESHOLD", "0.4"))
	MIN_FACE_SIZE = int(os.getenv("MIN_FACE_SIZE", "30"))
	MAX_FACES_PER_IMAGE = int(os.getenv("MAX_FACES_PER_IMAGE", "20"))

	# Phase 2: relaxed from 0.5 → 0.3 to index more faces (filter at query time)
	FACE_QUALITY_GATE = float(os.getenv("FACE_QUALITY_GATE", "0.3"))

	# Laplacian variance blur threshold for face crops.
	# Faces below this score are excluded from search results AND clustering.
	# Typical values: >100 = sharp, 50-100 = acceptable, <50 = blurry.
	FACE_BLUR_THRESHOLD = float(os.getenv("FACE_BLUR_THRESHOLD", "50.0"))
	CLUSTERING_BLUR_THRESHOLD = float(os.getenv("CLUSTERING_BLUR_THRESHOLD", "30.0")) # Slightly more lenient for clustering

	# ──────────────────────────────────────────────────────────────
	# Embedding dimensions
	# ──────────────────────────────────────────────────────────────
	FACE_DIM = 512
	ADAFACE_DIM = 512
	FUSED_FACE_DIM = 1024

	FACE_CROP_THUMB_SIZE = int(os.getenv("FACE_CROP_THUMB_SIZE", "112"))
	FACE_CROP_QUALITY = int(os.getenv("FACE_CROP_QUALITY", "85"))
	FACE_CROP_PADDING = float(os.getenv("FACE_CROP_PADDING", "0.2"))
	ADAFACE_CROP_PADDING = float(os.getenv("ADAFACE_CROP_PADDING", "0.1"))

	ENABLE_ADAFACE = int(os.getenv("ENABLE_ADAFACE", "1"))


	# ──────────────────────────────────────────────────────────────
	# Phase 1: Speed flags (unchanged, leaving on)
	# ──────────────────────────────────────────────────────────────
	USE_ONNX_VISION = int(os.getenv("USE_ONNX_VISION", "0"))
	ONNX_MODELS_DIR = os.getenv("ONNX_MODELS_DIR", "onnx_models")
	ONNX_USE_INT8 = int(os.getenv("ONNX_USE_INT8", "1"))
	ENABLE_MULTI_SCALE_FALLBACK = int(os.getenv("ENABLE_MULTI_SCALE_FALLBACK", "1"))
	ENABLE_HORIZONTAL_FLIP = int(os.getenv("ENABLE_HORIZONTAL_FLIP", "0"))
	OMP_NUM_THREADS = int(os.getenv("OMP_NUM_THREADS", "2"))
	MKL_NUM_THREADS = int(os.getenv("MKL_NUM_THREADS", "2"))

	# ──────────────────────────────────────────────────────────────
	# Phase 2: Recall flags — DEFAULT ON
	# ──────────────────────────────────────────────────────────────

	# Split-index mode: write ArcFace + AdaFace to separate indexes, score-fuse at query
	USE_SPLIT_FACE_INDEXES = int(os.getenv("USE_SPLIT_FACE_INDEXES", "1"))

	# Score fusion weights. ArcFace is more discriminative for generic faces;
	# AdaFace helps with low-quality/extreme-angle cases. 0.6/0.4 is NIST-FRVT standard.
	ARCFACE_WEIGHT = float(os.getenv("ARCFACE_WEIGHT", "0.6"))
	ADAFACE_WEIGHT = float(os.getenv("ADAFACE_WEIGHT", "0.4"))

	# ArcFace-R100 same-person mean ~0.55, std ~0.12.
	# Lowered from 0.30 to 0.22 for better recall on same-person photos.
	# Still well above impostor tail (different-person mean ~0.05, std ~0.08).
	FACE_MATCH_THRESHOLD = float(os.getenv("FACE_MATCH_THRESHOLD", "0.22"))

	# With both models agreeing, 0.26 fused ≈ arc 0.22 + ada 0.30 together.
	# Lower threshold when both models agree = more complete galleries.
	FUSED_MATCH_THRESHOLD = float(os.getenv("FUSED_MATCH_THRESHOLD", "0.26"))

	# ArcFace-only floor (no AdaFace confirmation available).
	# Lowered from 0.38 to 0.28 — still strict enough to reject imposters while
	# capturing same-person photos across diverse angles/lighting.
	ARCFACE_SOLO_THRESHOLD = float(os.getenv("ARCFACE_SOLO_THRESHOLD", "0.28"))

	# Query-time augmentation: OFF by default, enabled via deep_search form flag
	ENABLE_QUERY_TIME_AUG = int(os.getenv("ENABLE_QUERY_TIME_AUG", "0"))

	# Larger top_k: was 50, now 500 so large galleries aren't truncated
	FACE_SEARCH_TOP_K = int(os.getenv("FACE_SEARCH_TOP_K", "500"))
	OBJECT_SEARCH_TOP_K = int(os.getenv("OBJECT_SEARCH_TOP_K", "100"))

	# Final API returns at most this many per-face matches (after dedup)
	# Increased from 200 to 500 to show complete photo galleries
	FACE_RESULTS_PER_QUERY_CAP = int(os.getenv("FACE_RESULTS_PER_QUERY_CAP", "500"))

	# ──────────────────────────────────────────────────────────────
	# Phase 3: People View + Job Queue — DEFAULT OFF (opt-in via env)
	# ──────────────────────────────────────────────────────────────

	# Redis-backed inference cache + job queue (requires Upstash)
	# Set UPSTASH_REDIS_URL + UPSTASH_REDIS_TOKEN in HF Space secrets.


	# Master toggle: enable the persistent Redis cache (replaces in-memory dict).
	# Falls back to in-memory if UPSTASH_REDIS_URL is not set, so this is safe to
	# leave True even before Upstash is wired up.
	USE_REDIS_CACHE = int(os.getenv("USE_REDIS_CACHE", "0"))

	# Async upload mode: when True, POST /api/upload?async=true returns a job_id
	# immediately and processes in the background worker.
	# Synchronous uploads (no ?async param) always work regardless of this flag.
	USE_ASYNC_UPLOADS = int(os.getenv("USE_ASYNC_UPLOADS", "1"))

	# Cluster-aware search expansion: after the initial face search, expand results
	# to include ALL images in the matched identity clusters.
	# Near-100% recall for well-indexed people. Disable if Supabase is slow.
	USE_CLUSTER_AWARE_SEARCH = int(os.getenv("USE_CLUSTER_AWARE_SEARCH", "1"))

	# HDBSCAN parameters — tuned for typical 1k–10k image libraries
	CLUSTER_MIN_SAMPLES = int(os.getenv("CLUSTER_MIN_SAMPLES", "5")) # Increased to 5 for stricter clustering, fewer duplicates
	CLUSTER_MIN_CLUSTER_SIZE = int(os.getenv("CLUSTER_MIN_CLUSTER_SIZE", "5")) # Increased from 2 to 5, require 5+ faces per cluster
	CLUSTER_EPSILON = float(os.getenv("CLUSTER_EPSILON", "0.20")) # Tightened from 0.35 to 0.20 to reduce duplicate clusters

	# Auto re-cluster after every N new face uploads (0 = disabled, manual only)
	CLUSTER_AUTO_TRIGGER_EVERY = int(os.getenv("CLUSTER_AUTO_TRIGGER_EVERY", "0"))