Zhen Ye commited on
Commit
9ea2cfe
·
1 Parent(s): 4e93b33

fix: Move CUDA_VISIBLE_DEVICES clearing to module import time

Browse files

- Moved env var clearing to very top of inference.py before torch import
- Removed redundant debug logging and clearing logic from run_inference
- Ensures all 4 GPUs are visible before PyTorch CUDA context initializes
- Fixes HF Spaces issue where CUDA_VISIBLE_DEVICES=0 locks to single GPU

Files changed (1) hide show
  1. inference.py +7 -23
inference.py CHANGED
@@ -1,18 +1,15 @@
1
- import logging
 
2
  import os
 
 
 
 
3
  import time
4
  from threading import RLock, Thread
5
  from queue import Queue, PriorityQueue
6
  from typing import Any, Dict, List, Optional, Sequence, Tuple
7
 
8
- # Must clear CUDA_VISIBLE_DEVICES before importing torch to ensure all GPUs are seen
9
- if "CUDA_VISIBLE_DEVICES" in os.environ:
10
- # We only clear it if we are sure we want all GPUs.
11
- # In HF Spaces 4xGPU, this var might be set to "0" by default?
12
- # Let's log it first implies we need imports.
13
- # Just Unset it.
14
- del os.environ["CUDA_VISIBLE_DEVICES"]
15
-
16
  import cv2
17
  import numpy as np
18
  import torch
@@ -612,21 +609,8 @@ def run_inference(
612
  logging.info("Detection queries: %s", queries)
613
  active_detector = detector_name or "hf_yolov8"
614
 
615
- # 3. Parallel Model Loading
616
-
617
- # DEBUG: Log current state
618
- logging.info(f"[DEBUG] PID: {os.getpid()}")
619
- logging.info(f"[DEBUG] CUDA_VISIBLE_DEVICES before clear: {os.environ.get('CUDA_VISIBLE_DEVICES')}")
620
- logging.info(f"[DEBUG] torch.cuda.device_count() before clear: {torch.cuda.device_count()}")
621
-
622
- # Clear CUDA_VISIBLE_DEVICES to ensure we see all GPUs if not already handled
623
- # This must be done BEFORE any torch.cuda calls in this scope if the env was modified externally
624
- if "CUDA_VISIBLE_DEVICES" in os.environ:
625
- logging.info("[DEBUG] Deleting CUDA_VISIBLE_DEVICES from env")
626
- del os.environ["CUDA_VISIBLE_DEVICES"]
627
-
628
  num_gpus = torch.cuda.device_count()
629
- logging.info(f"[DEBUG] num_gpus after clear: {num_gpus}")
630
  detectors = []
631
  depth_estimators = []
632
 
 
1
+ # CRITICAL: Clear CUDA_VISIBLE_DEVICES BEFORE any imports
2
+ # HF Spaces may set this to "0" dynamically, locking us to a single GPU
3
  import os
4
+ if "CUDA_VISIBLE_DEVICES" in os.environ:
5
+ del os.environ["CUDA_VISIBLE_DEVICES"]
6
+
7
+ import logging
8
  import time
9
  from threading import RLock, Thread
10
  from queue import Queue, PriorityQueue
11
  from typing import Any, Dict, List, Optional, Sequence, Tuple
12
 
 
 
 
 
 
 
 
 
13
  import cv2
14
  import numpy as np
15
  import torch
 
609
  logging.info("Detection queries: %s", queries)
610
  active_detector = detector_name or "hf_yolov8"
611
 
612
+ # Parallel Model Loading
 
 
 
 
 
 
 
 
 
 
 
 
613
  num_gpus = torch.cuda.device_count()
 
614
  detectors = []
615
  depth_estimators = []
616