iljung1106 commited on
Commit
0f961a6
·
1 Parent(s): 39ec415

GPU lazy load

Browse files
Files changed (2) hide show
  1. app.py +4 -4
  2. inference_utils.py +87 -59
app.py CHANGED
@@ -67,14 +67,14 @@ def main():
67
  model_path, embeddings_path, yolo_path = download_files()
68
 
69
  # Initialize App
70
- # We use 'cuda' because this script is intended for ZeroGPU spaces.
71
- # The @spaces.GPU decorator in inference_utils handles the actual GPU allocation during inference.
72
  print("Initializing Application...")
73
  app = StyleEmbedderApp(
74
  checkpoint_path=model_path,
75
  embeddings_path=embeddings_path,
76
- device='cuda',
77
- detector_device='cuda', # Use GPU for detector too
78
  yolo_weights=yolo_path
79
  )
80
 
 
67
  model_path, embeddings_path, yolo_path = download_files()
68
 
69
  # Initialize App
70
+ # Model loading is lazy - happens inside @spaces.GPU decorated function
71
+ # This avoids CUDA initialization in main process
72
  print("Initializing Application...")
73
  app = StyleEmbedderApp(
74
  checkpoint_path=model_path,
75
  embeddings_path=embeddings_path,
76
+ device='cuda', # Will be used when model loads (inside @spaces.GPU context)
77
+ detector_device='cpu', # Always use CPU for detector to avoid CUDA init
78
  yolo_weights=yolo_path
79
  )
80
 
inference_utils.py CHANGED
@@ -1,6 +1,10 @@
 
 
 
 
1
  import threading
2
  from pathlib import Path
3
- from typing import List, Optional, Tuple, Dict, Any
4
  import numpy as np
5
  import torch
6
  from PIL import Image
@@ -93,15 +97,11 @@ class FaceEyeExtractor:
93
  import cv2
94
 
95
  # Try to locate yolov5_anime if not strictly at yolo_dir
96
- # But for now assume yolo_dir is correct or we need to look around
97
  if not self.yolo_dir.exists():
98
  # Fallback: check if it's in the current working directory
99
  cwd_yolo = Path("yolov5_anime").resolve()
100
  if cwd_yolo.exists():
101
  self.yolo_dir = cwd_yolo
102
- else:
103
- # Just warning, let it fail later if critical or maybe it is installed in env
104
- pass
105
 
106
  if str(self.yolo_dir.resolve()) not in sys.path and self.yolo_dir.exists():
107
  sys.path.insert(0, str(self.yolo_dir.resolve()))
@@ -114,8 +114,6 @@ class FaceEyeExtractor:
114
  from models.experimental import attempt_load # type: ignore
115
  from utils.torch_utils import select_device # type: ignore
116
  except ImportError:
117
- # If yolov5 is not in path, we can't do much.
118
- # Assuming the user ensures yolov5_anime folder is present.
119
  if not self.yolo_dir.exists():
120
  raise RuntimeError(f"yolov5_anime dir not found: {self.yolo_dir}")
121
  raise
@@ -129,7 +127,9 @@ class FaceEyeExtractor:
129
 
130
  _torch.load = patched_load
131
  try:
132
- self._yolo_device = select_device(self.device)
 
 
133
  if not self.weights_path.exists():
134
  raise RuntimeError(f"YOLO weights not found: {self.weights_path}")
135
  self._yolo_model = attempt_load(str(self.weights_path), map_location=self._yolo_device)
@@ -425,7 +425,7 @@ class FaceEyeExtractor:
425
 
426
 
427
  class StyleEmbedderApp:
428
- """Web UI 앱"""
429
 
430
  def __init__(
431
  self,
@@ -437,62 +437,86 @@ class StyleEmbedderApp:
437
  eyes_cascade: Optional[str] = None,
438
  detector_device: str = 'cpu',
439
  ):
440
- requested_device = device
441
- if requested_device.startswith('cuda') and not torch.cuda.is_available():
442
- # Soft fallback or raise? The original code raised error.
443
- # But in spaces, if GPU is not assigned yet (ZeroGPU), it might be tricky.
444
- # However, spaces handles CUDA availability inside the decorated function usually.
445
- # Here initialization happens.
446
- pass
447
-
448
- if torch.cuda.is_available():
449
- self.device = torch.device(requested_device)
450
- else:
451
- self.device = torch.device('cpu')
452
-
453
- # Face/Eye extractor (lazy load)
454
  self._extractor = FaceEyeExtractor(
455
  yolo_dir=_default_path('yolov5_anime') if yolo_dir is None else Path(yolo_dir),
456
  weights_path=_default_path('yolov5x_anime.pt') if yolo_weights is None else Path(yolo_weights),
457
  cascade_path=_default_path('anime-eyes-cascade.xml') if eyes_cascade is None else Path(eyes_cascade),
458
- device=detector_device,
459
- )
460
-
461
- # 모델 로드
462
- print("Loading model...")
463
- # Always load checkpoint on CPU to avoid duplicating large tensors on GPU.
464
- checkpoint = torch.load(checkpoint_path, map_location='cpu')
465
- config = get_config()
466
-
467
- self.model = ArtistStyleModel(
468
- num_classes=len(checkpoint['artist_to_idx']),
469
- embedding_dim=config.model.embedding_dim,
470
- hidden_dim=config.model.hidden_dim,
471
  )
472
- self.model.load_state_dict(checkpoint['model_state_dict'])
473
-
474
- # Reduce VRAM: keep weights in FP16 on CUDA.
475
- if self.device.type == 'cuda':
476
- self.model = self.model.to(dtype=torch.float16)
477
- self.model = self.model.to(self.device)
478
- self.model.eval()
479
-
480
- self.embedding_dim = config.model.embedding_dim
481
 
482
- # 임베딩 로드
483
- print("Loading embeddings...")
484
- data = np.load(embeddings_path)
485
- self.artist_names = data['artist_names'].tolist()
486
- self.embeddings = data['embeddings']
487
- print(f"Loaded {len(self.artist_names)} artist embeddings")
488
-
489
- # Transform
490
  self.transform = transforms.Compose([
491
  transforms.Resize((224, 224)),
492
  transforms.ToTensor(),
493
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
494
  ])
495
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
496
  def preprocess_image(self, image: Optional[Image.Image]) -> Optional[torch.Tensor]:
497
  """이미지 전처리"""
498
  if image is None:
@@ -523,7 +547,10 @@ class StyleEmbedderApp:
523
  face_image: Optional[Image.Image] = None,
524
  eye_image: Optional[Image.Image] = None,
525
  ) -> np.ndarray:
526
- """이미지에서 임베딩 추출"""
 
 
 
527
 
528
  full_tensor = self.preprocess_image(full_image)
529
  if full_tensor is None:
@@ -564,9 +591,7 @@ class StyleEmbedderApp:
564
  has_eye = torch.tensor([False]).to(self.device)
565
 
566
  with torch.cuda.amp.autocast(enabled=(self.device.type == 'cuda')):
567
- # Ensure model is on the correct device (ZeroGPU might handle this, but good to ensure)
568
- # In ZeroGPU, the function runs on GPU.
569
- embedding = self.model.get_embeddings(full, face, eye, has_face, has_eye)
570
 
571
  # Keep output float32 for downstream numpy similarity math.
572
  return embedding.squeeze(0).float().cpu().numpy()
@@ -577,12 +602,15 @@ class StyleEmbedderApp:
577
  top_k: int = 10,
578
  ) -> List[Tuple[str, float]]:
579
  """유사 작가 검색"""
 
 
 
580
  query_norm = query_embedding / np.linalg.norm(query_embedding)
581
- embeddings_norm = self.embeddings / np.linalg.norm(self.embeddings, axis=1, keepdims=True)
582
  similarities = embeddings_norm @ query_norm
583
 
584
  top_indices = np.argsort(similarities)[::-1][:top_k]
585
- return [(self.artist_names[i], float(similarities[i])) for i in top_indices]
586
 
587
  def extract_crops(self, full_image: Image.Image) -> Tuple[Optional[Image.Image], Optional[Image.Image], str]:
588
  """얼굴과 눈 자동 추출"""
 
1
+ """
2
+ Three-View-Style-Embedder - Inference Utilities
3
+ Lazy loading for Hugging Face Spaces compatibility
4
+ """
5
  import threading
6
  from pathlib import Path
7
+ from typing import List, Optional, Tuple
8
  import numpy as np
9
  import torch
10
  from PIL import Image
 
97
  import cv2
98
 
99
  # Try to locate yolov5_anime if not strictly at yolo_dir
 
100
  if not self.yolo_dir.exists():
101
  # Fallback: check if it's in the current working directory
102
  cwd_yolo = Path("yolov5_anime").resolve()
103
  if cwd_yolo.exists():
104
  self.yolo_dir = cwd_yolo
 
 
 
105
 
106
  if str(self.yolo_dir.resolve()) not in sys.path and self.yolo_dir.exists():
107
  sys.path.insert(0, str(self.yolo_dir.resolve()))
 
114
  from models.experimental import attempt_load # type: ignore
115
  from utils.torch_utils import select_device # type: ignore
116
  except ImportError:
 
 
117
  if not self.yolo_dir.exists():
118
  raise RuntimeError(f"yolov5_anime dir not found: {self.yolo_dir}")
119
  raise
 
127
 
128
  _torch.load = patched_load
129
  try:
130
+ # For Spaces, use CPU for detector to avoid CUDA init in main process
131
+ detector_device = 'cpu' if self.device.startswith('cuda') else self.device
132
+ self._yolo_device = select_device(detector_device)
133
  if not self.weights_path.exists():
134
  raise RuntimeError(f"YOLO weights not found: {self.weights_path}")
135
  self._yolo_model = attempt_load(str(self.weights_path), map_location=self._yolo_device)
 
425
 
426
 
427
  class StyleEmbedderApp:
428
+ """Web UI 앱 - Lazy loading for Spaces compatibility"""
429
 
430
  def __init__(
431
  self,
 
437
  eyes_cascade: Optional[str] = None,
438
  detector_device: str = 'cpu',
439
  ):
440
+ # Store paths - don't load anything yet to avoid CUDA init in main process
441
+ self.checkpoint_path = checkpoint_path
442
+ self.embeddings_path = embeddings_path
443
+ self.requested_device = device
444
+ self.detector_device = detector_device
445
+
446
+ # Model will be loaded lazily in @spaces.GPU decorated function
447
+ self._model = None
448
+ self._model_lock = threading.RLock()
449
+ self._embeddings_loaded = False
450
+ self._artist_names = None
451
+ self._embeddings = None
452
+
453
+ # Face/Eye extractor (lazy load, uses CPU for detector to avoid CUDA init)
454
  self._extractor = FaceEyeExtractor(
455
  yolo_dir=_default_path('yolov5_anime') if yolo_dir is None else Path(yolo_dir),
456
  weights_path=_default_path('yolov5x_anime.pt') if yolo_weights is None else Path(yolo_weights),
457
  cascade_path=_default_path('anime-eyes-cascade.xml') if eyes_cascade is None else Path(eyes_cascade),
458
+ device='cpu', # Always use CPU for detector to avoid CUDA init
 
 
 
 
 
 
 
 
 
 
 
 
459
  )
 
 
 
 
 
 
 
 
 
460
 
461
+ # Transform (no CUDA needed)
 
 
 
 
 
 
 
462
  self.transform = transforms.Compose([
463
  transforms.Resize((224, 224)),
464
  transforms.ToTensor(),
465
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
466
  ])
467
 
468
+ def _ensure_model_loaded(self):
469
+ """Lazy load model - only called inside @spaces.GPU decorated function"""
470
+ if self._model is not None:
471
+ return
472
+
473
+ with self._model_lock:
474
+ if self._model is not None:
475
+ return
476
+
477
+ print("Loading model (lazy)...")
478
+ # Load checkpoint on CPU first
479
+ checkpoint = torch.load(self.checkpoint_path, map_location='cpu')
480
+ config = get_config()
481
+
482
+ self._model = ArtistStyleModel(
483
+ num_classes=len(checkpoint['artist_to_idx']),
484
+ embedding_dim=config.model.embedding_dim,
485
+ hidden_dim=config.model.hidden_dim,
486
+ )
487
+ self._model.load_state_dict(checkpoint['model_state_dict'])
488
+
489
+ # Determine device - in @spaces.GPU context, CUDA should be available
490
+ if self.requested_device.startswith('cuda') and torch.cuda.is_available():
491
+ device = torch.device(self.requested_device)
492
+ # Reduce VRAM: keep weights in FP16 on CUDA
493
+ self._model = self._model.to(dtype=torch.float16)
494
+ else:
495
+ device = torch.device('cpu')
496
+
497
+ self._model = self._model.to(device)
498
+ self._model.eval()
499
+ self.device = device
500
+ self.embedding_dim = config.model.embedding_dim
501
+
502
+ print("Model loaded successfully")
503
+
504
+ def _ensure_embeddings_loaded(self):
505
+ """Lazy load embeddings - no CUDA needed"""
506
+ if self._embeddings_loaded:
507
+ return
508
+
509
+ with self._model_lock:
510
+ if self._embeddings_loaded:
511
+ return
512
+
513
+ print("Loading embeddings...")
514
+ data = np.load(self.embeddings_path)
515
+ self._artist_names = data['artist_names'].tolist()
516
+ self._embeddings = data['embeddings']
517
+ self._embeddings_loaded = True
518
+ print(f"Loaded {len(self._artist_names)} artist embeddings")
519
+
520
  def preprocess_image(self, image: Optional[Image.Image]) -> Optional[torch.Tensor]:
521
  """이미지 전처리"""
522
  if image is None:
 
547
  face_image: Optional[Image.Image] = None,
548
  eye_image: Optional[Image.Image] = None,
549
  ) -> np.ndarray:
550
+ """이미지에서 임베딩 추출 - GPU lazy loading"""
551
+
552
+ # Load model on first call (inside @spaces.GPU context)
553
+ self._ensure_model_loaded()
554
 
555
  full_tensor = self.preprocess_image(full_image)
556
  if full_tensor is None:
 
591
  has_eye = torch.tensor([False]).to(self.device)
592
 
593
  with torch.cuda.amp.autocast(enabled=(self.device.type == 'cuda')):
594
+ embedding = self._model.get_embeddings(full, face, eye, has_face, has_eye)
 
 
595
 
596
  # Keep output float32 for downstream numpy similarity math.
597
  return embedding.squeeze(0).float().cpu().numpy()
 
602
  top_k: int = 10,
603
  ) -> List[Tuple[str, float]]:
604
  """유사 작가 검색"""
605
+ # Load embeddings if not loaded
606
+ self._ensure_embeddings_loaded()
607
+
608
  query_norm = query_embedding / np.linalg.norm(query_embedding)
609
+ embeddings_norm = self._embeddings / np.linalg.norm(self._embeddings, axis=1, keepdims=True)
610
  similarities = embeddings_norm @ query_norm
611
 
612
  top_indices = np.argsort(similarities)[::-1][:top_k]
613
+ return [(self._artist_names[i], float(similarities[i])) for i in top_indices]
614
 
615
  def extract_crops(self, full_image: Image.Image) -> Tuple[Optional[Image.Image], Optional[Image.Image], str]:
616
  """얼굴과 눈 자동 추출"""