Spaces:

BiasLab2025
/

detection_base

Paused

Zhen Ye commited on Jan 22

Commit

9ea2cfe

1 Parent(s): 4e93b33

fix: Move CUDA_VISIBLE_DEVICES clearing to module import time

- Moved env var clearing to very top of inference.py before torch import
- Removed redundant debug logging and clearing logic from run_inference
- Ensures all 4 GPUs are visible before PyTorch CUDA context initializes
- Fixes HF Spaces issue where CUDA_VISIBLE_DEVICES=0 locks to single GPU

Files changed (1) hide show

inference.py +7 -23

inference.py CHANGED Viewed

@@ -1,18 +1,15 @@
-import logging
 import os
 import time
 from threading import RLock, Thread
 from queue import Queue, PriorityQueue
 from typing import Any, Dict, List, Optional, Sequence, Tuple
-# Must clear CUDA_VISIBLE_DEVICES before importing torch to ensure all GPUs are seen
-if "CUDA_VISIBLE_DEVICES" in os.environ:
-    # We only clear it if we are sure we want all GPUs.
-    # In HF Spaces 4xGPU, this var might be set to "0" by default?
-    # Let's log it first implies we need imports.
-    # Just Unset it.
-    del os.environ["CUDA_VISIBLE_DEVICES"]
 import cv2
 import numpy as np
 import torch
@@ -612,21 +609,8 @@ def run_inference(
     logging.info("Detection queries: %s", queries)
     active_detector = detector_name or "hf_yolov8"
-    # 3. Parallel Model Loading
-    # DEBUG: Log current state
-    logging.info(f"[DEBUG] PID: {os.getpid()}")
-    logging.info(f"[DEBUG] CUDA_VISIBLE_DEVICES before clear: {os.environ.get('CUDA_VISIBLE_DEVICES')}")
-    logging.info(f"[DEBUG] torch.cuda.device_count() before clear: {torch.cuda.device_count()}")
-    # Clear CUDA_VISIBLE_DEVICES to ensure we see all GPUs if not already handled
-    # This must be done BEFORE any torch.cuda calls in this scope if the env was modified externally
-    if "CUDA_VISIBLE_DEVICES" in os.environ:
-         logging.info("[DEBUG] Deleting CUDA_VISIBLE_DEVICES from env")
-         del os.environ["CUDA_VISIBLE_DEVICES"]
     num_gpus = torch.cuda.device_count()
-    logging.info(f"[DEBUG] num_gpus after clear: {num_gpus}")
     detectors = []
     depth_estimators = []

+# CRITICAL: Clear CUDA_VISIBLE_DEVICES BEFORE any imports
+# HF Spaces may set this to "0" dynamically, locking us to a single GPU
 import os
+if "CUDA_VISIBLE_DEVICES" in os.environ:
+    del os.environ["CUDA_VISIBLE_DEVICES"]
+import logging
 import time
 from threading import RLock, Thread
 from queue import Queue, PriorityQueue
 from typing import Any, Dict, List, Optional, Sequence, Tuple
 import cv2
 import numpy as np
 import torch
     logging.info("Detection queries: %s", queries)
     active_detector = detector_name or "hf_yolov8"
+    # Parallel Model Loading
     num_gpus = torch.cuda.device_count()
     detectors = []
     depth_estimators = []