DepthLens / src /config.py
Rishabh Jain
Initial upload β€” depth-aware scene description system
5412d82
"""
Central configuration for the Depth-Aware Scene Description system.
All constants, paths, and hyperparameters live here.
"""
from pathlib import Path
# ── Paths ──
PROJECT_ROOT = Path(__file__).parent.parent
DATA_DIR = PROJECT_ROOT / "data"
TEST_IMAGES_DIR = DATA_DIR / "test_images"
GROUND_TRUTH_DIR = DATA_DIR / "ground_truth"
REFERENCES_DIR = DATA_DIR / "references"
OUTPUTS_DIR = PROJECT_ROOT / "outputs"
RESULTS_DIR = OUTPUTS_DIR / "results"
ARKITSCENES_TMP_DIR = DATA_DIR / "_arkit_tmp"
# ── Camera ──
HFOV_DEG = 70 # typical smartphone horizontal field of view
DEPTH_MIN_CM = 20 # minimum mapped depth (cm)
DEPTH_MAX_CM = 200 # maximum mapped depth (cm)
# ── Models ──
YOLO_MODEL = "yolov8n.pt"
DEPTH_MODEL = "depth-anything/Depth-Anything-V2-Small-hf"
MOBILE_SAM_CKPT = "mobile_sam.pt"
MOBILE_SAM_URL = "https://github.com/ChaoningZhang/MobileSAM/raw/master/weights/mobile_sam.pt"
# Moondream (Colab T4)
MOONDREAM_ID = "vikhyatk/moondream2"
MOONDREAM_REVISION = "2025-06-21"
# Qwen (RTX 5060)
QWEN_ID = "Qwen/Qwen2.5-VL-3B-Instruct"
QWEN_MIN_PIXELS = 256 * 28 * 28 # 256 tokens
QWEN_MAX_PIXELS = 512 * 28 * 28 # 512 tokens
# Gemma 4 E2B IT (production β€” requires transformers>=5.5.0)
GEMMA4_ID = "google/gemma-4-E2B-it"
GEMMA4_MAX_NEW_TOKENS = 300
# ── Detection ──
CONF_THRESHOLD = 0.3
# ── Evaluation ──
N_TEST_IMAGES = 50
N_SPATIAL_SCENES = 30
N_SIZE_OBJECTS = 20
N_LATENCY_RUNS = 10
# ── Assistive prompt templates ──
ASSISTIVE_PROMPT = (
"Describe this scene for a visually impaired person. "
"Use the depth measurements provided to give a spatial description: "
"how deep or large the space appears to be, "
"what objects are present and whether each one is to the left, centre, or right, "
"roughly how far away each object is, "
"and any navigation hazards or obstacles. "
"Be concise and natural, as if guiding someone who cannot see."
)
QUERY_MODES = {
"identify": "What is this object? Provide a one-sentence identification.",
"describe": (
"Describe this object for a visually impaired user: material, "
"condition, any visible text or branding, and one practical detail."
),
"spatial": (
"Describe the spatial arrangement of all objects in the scene "
"using the depth measurements provided. Use directions relative to "
"the viewer (ahead, left, right) and include distances."
),
"measure": (
"Using the estimated physical dimensions provided, describe how "
"large this object is. Compare its size to a common everyday "
"reference object a visually impaired person would know by touch."
),
"navigate": (
"Describe this scene as navigation guidance for a visually impaired "
"person. Mention obstacles, clear paths, distances to key objects, "
"and any potential hazards like edges or steps."
),
"full_scene": ASSISTIVE_PROMPT,
}