C4G-HKUST commited on
Commit
f0617b0
·
1 Parent(s): 3c322ce

Fix Stateless GPU environment: avoid CUDA init in main process, initialize GPU in worker process

Browse files
Files changed (2) hide show
  1. app.py +60 -42
  2. wan/audio2video_multiID.py +4 -4
app.py CHANGED
@@ -384,35 +384,46 @@ def run_graio_demo(args):
384
 
385
  os.makedirs(args.audio_save_dir, exist_ok=True)
386
 
387
- # 运行时动态检测 GPU 可用性(参考 Meigen-MultiTalk)
388
- # 记录 GPU 信息,以便在 worker 进程中使用相同的 GPU
 
389
  gpu_device_id = None
390
  gpu_name = None
391
  gpu_uuid = None
392
 
393
- if torch.cuda.is_available():
394
- try:
395
- num_gpus = torch.cuda.device_count()
396
- if num_gpus > 0:
397
- gpu_device_id = local_rank if world_size > 1 else 0
398
- torch.cuda.set_device(gpu_device_id)
399
- gpu_name = torch.cuda.get_device_name(gpu_device_id)
400
- # 尝试获取 GPU UUID(如果可用)
401
- try:
402
- gpu_uuid = torch.cuda.get_device_properties(gpu_device_id).uuid
403
- except:
404
- pass
405
- logging.info(f"GPU AVAILABLE: {num_gpus} GPU(s), Device ID: {gpu_device_id}, Name: {gpu_name}, UUID: {gpu_uuid}")
406
- device = gpu_device_id
407
- else:
408
- logging.warning("CUDA is available but no GPU devices found. Using CPU.")
 
 
 
 
 
 
 
 
 
 
 
 
 
409
  device = -1 # 使用 CPU
410
- except Exception as e:
411
- logging.warning(f"GPU detection error: {e}. Using CPU.")
412
  device = -1 # 使用 CPU
413
- else:
414
- logging.warning("No CUDA-compatible GPU found. Using CPU (slower).")
415
- device = -1 # 使用 CPU
416
 
417
  logging.info("Creating AnyTalker pipeline.")
418
  # 加载模型
@@ -437,15 +448,17 @@ def run_graio_demo(args):
437
 
438
  def generate_video(img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3,
439
  sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector):
440
- # 确保使用初始化时记录的 GPU 设备
441
- if gpu_device_id is not None and torch.cuda.is_available():
442
  try:
443
- torch.cuda.set_device(gpu_device_id)
 
 
444
  current_device = torch.cuda.current_device()
445
  current_gpu_name = torch.cuda.get_device_name(current_device)
446
  logging.info(f"Using GPU device {current_device} ({current_gpu_name}) for inference")
447
  except Exception as e:
448
- logging.warning(f"Failed to set GPU device {gpu_device_id}: {e}")
449
 
450
  input_data = {}
451
  input_data["prompt"] = img2vid_prompt
@@ -598,25 +611,30 @@ def run_graio_demo(args):
598
  # 参考: https://huggingface.co/spaces/KlingTeam/LivePortrait/blob/main/app.py
599
  @spaces.GPU(duration=360)
600
  def gpu_wrapped_generate_video(*args, **kwargs):
601
- # 在 worker 进程中确保使用初始化时记录的 GPU
602
- if gpu_device_id is not None:
603
- try:
604
- if torch.cuda.is_available():
605
- # 设置到初始化时记录的 GPU 设备
606
- torch.cuda.set_device(gpu_device_id)
 
 
 
607
  current_device = torch.cuda.current_device()
608
  current_gpu_name = torch.cuda.get_device_name(current_device)
609
- logging.info(f"Worker process using GPU device {current_device} ({current_gpu_name}) - matching initialization")
610
 
611
- # 验证 GPU 名称是否匹配(如果记录了名称)
612
- if gpu_name and current_gpu_name != gpu_name:
613
- logging.warning(f"GPU name mismatch: init={gpu_name}, worker={current_gpu_name}")
 
 
614
  else:
615
- logging.warning("GPU not available in worker process, but continuing...")
616
- except RuntimeError as e:
617
- logging.warning(f"GPU initialization error in worker process: {e}. Continuing anyway...")
618
- else:
619
- logging.info("No GPU device ID recorded, using default device")
620
 
621
  return generate_video(*args, **kwargs)
622
 
 
384
 
385
  os.makedirs(args.audio_save_dir, exist_ok=True)
386
 
387
+ # Stateless GPU 环境中,主进程不能初始化 CUDA
388
+ # GPU 检测和初始化将在 worker 进程中进行(通过 @spaces.GPU 装饰器)
389
+ # 在主进程中,我们使用 CPU 加载模型,然后在 worker 进程中移动到 GPU
390
  gpu_device_id = None
391
  gpu_name = None
392
  gpu_uuid = None
393
 
394
+ # 检查是否在 Stateless GPU 环境中(通过检查 SPACE_ID 和 spaces 模块)
395
+ is_stateless_gpu = os.environ.get("SPACE_ID") is not None
396
+
397
+ if is_stateless_gpu:
398
+ # Stateless GPU 环境:主进程不能初始化 CUDA,使用 CPU 加载模型
399
+ logging.info("Stateless GPU environment detected. Loading models on CPU in main process.")
400
+ logging.info("GPU will be initialized in worker process via @spaces.GPU decorator.")
401
+ device = -1 # 使用 CPU 加载模型
402
+ else:
403
+ # 本地环境:可以正常检测和使用 GPU
404
+ if torch.cuda.is_available():
405
+ try:
406
+ num_gpus = torch.cuda.device_count()
407
+ if num_gpus > 0:
408
+ gpu_device_id = local_rank if world_size > 1 else 0
409
+ torch.cuda.set_device(gpu_device_id)
410
+ gpu_name = torch.cuda.get_device_name(gpu_device_id)
411
+ # 尝试获取 GPU UUID(如果可用)
412
+ try:
413
+ gpu_uuid = torch.cuda.get_device_properties(gpu_device_id).uuid
414
+ except:
415
+ pass
416
+ logging.info(f"GPU AVAILABLE: {num_gpus} GPU(s), Device ID: {gpu_device_id}, Name: {gpu_name}, UUID: {gpu_uuid}")
417
+ device = gpu_device_id
418
+ else:
419
+ logging.warning("CUDA is available but no GPU devices found. Using CPU.")
420
+ device = -1 # 使用 CPU
421
+ except Exception as e:
422
+ logging.warning(f"GPU detection error: {e}. Using CPU.")
423
  device = -1 # 使用 CPU
424
+ else:
425
+ logging.warning("No CUDA-compatible GPU found. Using CPU (slower).")
426
  device = -1 # 使用 CPU
 
 
 
427
 
428
  logging.info("Creating AnyTalker pipeline.")
429
  # 加载模型
 
448
 
449
  def generate_video(img2vid_image, img2vid_prompt, n_prompt, img2vid_audio_1, img2vid_audio_2, img2vid_audio_3,
450
  sd_steps, seed, guide_scale, person_num_selector, audio_mode_selector):
451
+ # worker 进程中设置 GPU 设备(Stateless GPU 环境)
452
+ if torch.cuda.is_available():
453
  try:
454
+ # 如果记录了 GPU 设备 ID,使用它;否则使用默认设备 0
455
+ target_device = gpu_device_id if gpu_device_id is not None else 0
456
+ torch.cuda.set_device(target_device)
457
  current_device = torch.cuda.current_device()
458
  current_gpu_name = torch.cuda.get_device_name(current_device)
459
  logging.info(f"Using GPU device {current_device} ({current_gpu_name}) for inference")
460
  except Exception as e:
461
+ logging.warning(f"Failed to set GPU device: {e}")
462
 
463
  input_data = {}
464
  input_data["prompt"] = img2vid_prompt
 
611
  # 参考: https://huggingface.co/spaces/KlingTeam/LivePortrait/blob/main/app.py
612
  @spaces.GPU(duration=360)
613
  def gpu_wrapped_generate_video(*args, **kwargs):
614
+ # 在 worker 进程中初始化 GPU(Stateless GPU 环境)
615
+ worker_gpu_device_id = None
616
+ try:
617
+ if torch.cuda.is_available():
618
+ # worker 进程中检测 GPU
619
+ num_gpus = torch.cuda.device_count()
620
+ if num_gpus > 0:
621
+ worker_gpu_device_id = 0 # 使用第一个 GPU
622
+ torch.cuda.set_device(worker_gpu_device_id)
623
  current_device = torch.cuda.current_device()
624
  current_gpu_name = torch.cuda.get_device_name(current_device)
625
+ logging.info(f"Worker process initialized GPU: device {current_device} ({current_gpu_name})")
626
 
627
+ # 如果模型是在 CPU 上加载的,需要移动到 GPU
628
+ if device == -1:
629
+ logging.info("Moving models from CPU to GPU in worker process...")
630
+ # 注意:这里需要确保模型已经加载,并且可以移动到 GPU
631
+ # 由于模型是在主进程加载的,可能需要重新加载或移动
632
  else:
633
+ logging.warning("No GPU devices found in worker process")
634
+ else:
635
+ logging.warning("CUDA not available in worker process")
636
+ except RuntimeError as e:
637
+ logging.warning(f"GPU initialization error in worker process: {e}")
638
 
639
  return generate_video(*args, **kwargs)
640
 
wan/audio2video_multiID.py CHANGED
@@ -67,11 +67,11 @@ class WanAF2V:
67
  use_half (`bool`, *optional*, defaults to False):
68
  Whether to use half precision (float16/bfloat16) for model inference. Reduces memory usage.
69
  """
70
- # 如果 CUDA 不可用,自动回退到 CPU
71
- if torch.cuda.is_available():
72
- self.device = torch.device(f"cuda:{device_id}")
73
- else:
74
  self.device = torch.device("cpu")
 
 
75
  self.config = config
76
  self.rank = rank
77
  self.t5_cpu = t5_cpu
 
67
  use_half (`bool`, *optional*, defaults to False):
68
  Whether to use half precision (float16/bfloat16) for model inference. Reduces memory usage.
69
  """
70
+ # 如果 device_id 为 -1 或 CUDA 不可用,使用 CPU
71
+ if device_id == -1 or not torch.cuda.is_available():
 
 
72
  self.device = torch.device("cpu")
73
+ else:
74
+ self.device = torch.device(f"cuda:{device_id}")
75
  self.config = config
76
  self.rank = rank
77
  self.t5_cpu = t5_cpu