Spaces:
Paused
Paused
Zhen Ye commited on
Commit ·
9ea2cfe
1
Parent(s): 4e93b33
fix: Move CUDA_VISIBLE_DEVICES clearing to module import time
Browse files- Moved env var clearing to very top of inference.py before torch import
- Removed redundant debug logging and clearing logic from run_inference
- Ensures all 4 GPUs are visible before PyTorch CUDA context initializes
- Fixes HF Spaces issue where CUDA_VISIBLE_DEVICES=0 locks to single GPU
- inference.py +7 -23
inference.py
CHANGED
|
@@ -1,18 +1,15 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import time
|
| 4 |
from threading import RLock, Thread
|
| 5 |
from queue import Queue, PriorityQueue
|
| 6 |
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
| 7 |
|
| 8 |
-
# Must clear CUDA_VISIBLE_DEVICES before importing torch to ensure all GPUs are seen
|
| 9 |
-
if "CUDA_VISIBLE_DEVICES" in os.environ:
|
| 10 |
-
# We only clear it if we are sure we want all GPUs.
|
| 11 |
-
# In HF Spaces 4xGPU, this var might be set to "0" by default?
|
| 12 |
-
# Let's log it first implies we need imports.
|
| 13 |
-
# Just Unset it.
|
| 14 |
-
del os.environ["CUDA_VISIBLE_DEVICES"]
|
| 15 |
-
|
| 16 |
import cv2
|
| 17 |
import numpy as np
|
| 18 |
import torch
|
|
@@ -612,21 +609,8 @@ def run_inference(
|
|
| 612 |
logging.info("Detection queries: %s", queries)
|
| 613 |
active_detector = detector_name or "hf_yolov8"
|
| 614 |
|
| 615 |
-
#
|
| 616 |
-
|
| 617 |
-
# DEBUG: Log current state
|
| 618 |
-
logging.info(f"[DEBUG] PID: {os.getpid()}")
|
| 619 |
-
logging.info(f"[DEBUG] CUDA_VISIBLE_DEVICES before clear: {os.environ.get('CUDA_VISIBLE_DEVICES')}")
|
| 620 |
-
logging.info(f"[DEBUG] torch.cuda.device_count() before clear: {torch.cuda.device_count()}")
|
| 621 |
-
|
| 622 |
-
# Clear CUDA_VISIBLE_DEVICES to ensure we see all GPUs if not already handled
|
| 623 |
-
# This must be done BEFORE any torch.cuda calls in this scope if the env was modified externally
|
| 624 |
-
if "CUDA_VISIBLE_DEVICES" in os.environ:
|
| 625 |
-
logging.info("[DEBUG] Deleting CUDA_VISIBLE_DEVICES from env")
|
| 626 |
-
del os.environ["CUDA_VISIBLE_DEVICES"]
|
| 627 |
-
|
| 628 |
num_gpus = torch.cuda.device_count()
|
| 629 |
-
logging.info(f"[DEBUG] num_gpus after clear: {num_gpus}")
|
| 630 |
detectors = []
|
| 631 |
depth_estimators = []
|
| 632 |
|
|
|
|
| 1 |
+
# CRITICAL: Clear CUDA_VISIBLE_DEVICES BEFORE any imports
|
| 2 |
+
# HF Spaces may set this to "0" dynamically, locking us to a single GPU
|
| 3 |
import os
|
| 4 |
+
if "CUDA_VISIBLE_DEVICES" in os.environ:
|
| 5 |
+
del os.environ["CUDA_VISIBLE_DEVICES"]
|
| 6 |
+
|
| 7 |
+
import logging
|
| 8 |
import time
|
| 9 |
from threading import RLock, Thread
|
| 10 |
from queue import Queue, PriorityQueue
|
| 11 |
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
import cv2
|
| 14 |
import numpy as np
|
| 15 |
import torch
|
|
|
|
| 609 |
logging.info("Detection queries: %s", queries)
|
| 610 |
active_detector = detector_name or "hf_yolov8"
|
| 611 |
|
| 612 |
+
# Parallel Model Loading
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 613 |
num_gpus = torch.cuda.device_count()
|
|
|
|
| 614 |
detectors = []
|
| 615 |
depth_estimators = []
|
| 616 |
|