Spaces:

ij
/

Three-View-Style-Embedder-Combined

Sleeping

App Files Files Community

iljung1106 commited on Dec 27, 2025

Commit

0f961a6

1 Parent(s): 39ec415

GPU lazy load

Browse files

Files changed (2) hide show

app.py +4 -4
inference_utils.py +87 -59

app.py CHANGED Viewed

@@ -67,14 +67,14 @@ def main():
     model_path, embeddings_path, yolo_path = download_files()
     # Initialize App
-    # We use 'cuda' because this script is intended for ZeroGPU spaces.
-    # The @spaces.GPU decorator in inference_utils handles the actual GPU allocation during inference.
     print("Initializing Application...")
     app = StyleEmbedderApp(
         checkpoint_path=model_path,
         embeddings_path=embeddings_path,
-        device='cuda',
-        detector_device='cuda', # Use GPU for detector too
         yolo_weights=yolo_path
     )

     model_path, embeddings_path, yolo_path = download_files()
     # Initialize App
+    # Model loading is lazy - happens inside @spaces.GPU decorated function
+    # This avoids CUDA initialization in main process
     print("Initializing Application...")
     app = StyleEmbedderApp(
         checkpoint_path=model_path,
         embeddings_path=embeddings_path,
+        device='cuda',  # Will be used when model loads (inside @spaces.GPU context)
+        detector_device='cpu',  # Always use CPU for detector to avoid CUDA init
         yolo_weights=yolo_path
     )

inference_utils.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import threading
 from pathlib import Path
-from typing import List, Optional, Tuple, Dict, Any
 import numpy as np
 import torch
 from PIL import Image
@@ -93,15 +97,11 @@ class FaceEyeExtractor:
             import cv2
             # Try to locate yolov5_anime if not strictly at yolo_dir
-            # But for now assume yolo_dir is correct or we need to look around
             if not self.yolo_dir.exists():
                 # Fallback: check if it's in the current working directory
                 cwd_yolo = Path("yolov5_anime").resolve()
                 if cwd_yolo.exists():
                      self.yolo_dir = cwd_yolo
-                else:
-                    # Just warning, let it fail later if critical or maybe it is installed in env
-                    pass
             if str(self.yolo_dir.resolve()) not in sys.path and self.yolo_dir.exists():
                 sys.path.insert(0, str(self.yolo_dir.resolve()))
@@ -114,8 +114,6 @@ class FaceEyeExtractor:
                 from models.experimental import attempt_load  # type: ignore
                 from utils.torch_utils import select_device  # type: ignore
             except ImportError:
-                 # If yolov5 is not in path, we can't do much.
-                 # Assuming the user ensures yolov5_anime folder is present.
                  if not self.yolo_dir.exists():
                      raise RuntimeError(f"yolov5_anime dir not found: {self.yolo_dir}")
                  raise
@@ -129,7 +127,9 @@ class FaceEyeExtractor:
             _torch.load = patched_load
             try:
-                self._yolo_device = select_device(self.device)
                 if not self.weights_path.exists():
                      raise RuntimeError(f"YOLO weights not found: {self.weights_path}")
                 self._yolo_model = attempt_load(str(self.weights_path), map_location=self._yolo_device)
@@ -425,7 +425,7 @@ class FaceEyeExtractor:
 class StyleEmbedderApp:
-    """Web UI 앱"""
     def __init__(
         self,
@@ -437,62 +437,86 @@ class StyleEmbedderApp:
         eyes_cascade: Optional[str] = None,
         detector_device: str = 'cpu',
     ):
-        requested_device = device
-        if requested_device.startswith('cuda') and not torch.cuda.is_available():
-            # Soft fallback or raise? The original code raised error.
-            # But in spaces, if GPU is not assigned yet (ZeroGPU), it might be tricky.
-            # However, spaces handles CUDA availability inside the decorated function usually.
-            # Here initialization happens.
-            pass
-        if torch.cuda.is_available():
-            self.device = torch.device(requested_device)
-        else:
-            self.device = torch.device('cpu')
-        # Face/Eye extractor (lazy load)
         self._extractor = FaceEyeExtractor(
             yolo_dir=_default_path('yolov5_anime') if yolo_dir is None else Path(yolo_dir),
             weights_path=_default_path('yolov5x_anime.pt') if yolo_weights is None else Path(yolo_weights),
             cascade_path=_default_path('anime-eyes-cascade.xml') if eyes_cascade is None else Path(eyes_cascade),
-            device=detector_device,
-        )
-        # 모델 로드
-        print("Loading model...")
-        # Always load checkpoint on CPU to avoid duplicating large tensors on GPU.
-        checkpoint = torch.load(checkpoint_path, map_location='cpu')
-        config = get_config()
-        self.model = ArtistStyleModel(
-            num_classes=len(checkpoint['artist_to_idx']),
-            embedding_dim=config.model.embedding_dim,
-            hidden_dim=config.model.hidden_dim,
         )
-        self.model.load_state_dict(checkpoint['model_state_dict'])
-        # Reduce VRAM: keep weights in FP16 on CUDA.
-        if self.device.type == 'cuda':
-            self.model = self.model.to(dtype=torch.float16)
-        self.model = self.model.to(self.device)
-        self.model.eval()
-        self.embedding_dim = config.model.embedding_dim
-        # 임베딩 로드
-        print("Loading embeddings...")
-        data = np.load(embeddings_path)
-        self.artist_names = data['artist_names'].tolist()
-        self.embeddings = data['embeddings']
-        print(f"Loaded {len(self.artist_names)} artist embeddings")
-        # Transform
         self.transform = transforms.Compose([
             transforms.Resize((224, 224)),
             transforms.ToTensor(),
             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
         ])
     def preprocess_image(self, image: Optional[Image.Image]) -> Optional[torch.Tensor]:
         """이미지 전처리"""
         if image is None:
@@ -523,7 +547,10 @@ class StyleEmbedderApp:
         face_image: Optional[Image.Image] = None,
         eye_image: Optional[Image.Image] = None,
     ) -> np.ndarray:
-        """이미지에서 임베딩 추출"""
         full_tensor = self.preprocess_image(full_image)
         if full_tensor is None:
@@ -564,9 +591,7 @@ class StyleEmbedderApp:
             has_eye = torch.tensor([False]).to(self.device)
         with torch.cuda.amp.autocast(enabled=(self.device.type == 'cuda')):
-            # Ensure model is on the correct device (ZeroGPU might handle this, but good to ensure)
-            # In ZeroGPU, the function runs on GPU.
-            embedding = self.model.get_embeddings(full, face, eye, has_face, has_eye)
         # Keep output float32 for downstream numpy similarity math.
         return embedding.squeeze(0).float().cpu().numpy()
@@ -577,12 +602,15 @@ class StyleEmbedderApp:
         top_k: int = 10,
     ) -> List[Tuple[str, float]]:
         """유사 작가 검색"""
         query_norm = query_embedding / np.linalg.norm(query_embedding)
-        embeddings_norm = self.embeddings / np.linalg.norm(self.embeddings, axis=1, keepdims=True)
         similarities = embeddings_norm @ query_norm
         top_indices = np.argsort(similarities)[::-1][:top_k]
-        return [(self.artist_names[i], float(similarities[i])) for i in top_indices]
     def extract_crops(self, full_image: Image.Image) -> Tuple[Optional[Image.Image], Optional[Image.Image], str]:
         """얼굴과 눈 자동 추출"""

+"""
+Three-View-Style-Embedder - Inference Utilities
+Lazy loading for Hugging Face Spaces compatibility
+"""
 import threading
 from pathlib import Path
+from typing import List, Optional, Tuple
 import numpy as np
 import torch
 from PIL import Image
             import cv2
             # Try to locate yolov5_anime if not strictly at yolo_dir
             if not self.yolo_dir.exists():
                 # Fallback: check if it's in the current working directory
                 cwd_yolo = Path("yolov5_anime").resolve()
                 if cwd_yolo.exists():
                      self.yolo_dir = cwd_yolo
             if str(self.yolo_dir.resolve()) not in sys.path and self.yolo_dir.exists():
                 sys.path.insert(0, str(self.yolo_dir.resolve()))
                 from models.experimental import attempt_load  # type: ignore
                 from utils.torch_utils import select_device  # type: ignore
             except ImportError:
                  if not self.yolo_dir.exists():
                      raise RuntimeError(f"yolov5_anime dir not found: {self.yolo_dir}")
                  raise
             _torch.load = patched_load
             try:
+                # For Spaces, use CPU for detector to avoid CUDA init in main process
+                detector_device = 'cpu' if self.device.startswith('cuda') else self.device
+                self._yolo_device = select_device(detector_device)
                 if not self.weights_path.exists():
                      raise RuntimeError(f"YOLO weights not found: {self.weights_path}")
                 self._yolo_model = attempt_load(str(self.weights_path), map_location=self._yolo_device)
 class StyleEmbedderApp:
+    """Web UI 앱 - Lazy loading for Spaces compatibility"""
     def __init__(
         self,
         eyes_cascade: Optional[str] = None,
         detector_device: str = 'cpu',
     ):
+        # Store paths - don't load anything yet to avoid CUDA init in main process
+        self.checkpoint_path = checkpoint_path
+        self.embeddings_path = embeddings_path
+        self.requested_device = device
+        self.detector_device = detector_device
+        # Model will be loaded lazily in @spaces.GPU decorated function
+        self._model = None
+        self._model_lock = threading.RLock()
+        self._embeddings_loaded = False
+        self._artist_names = None
+        self._embeddings = None
+        # Face/Eye extractor (lazy load, uses CPU for detector to avoid CUDA init)
         self._extractor = FaceEyeExtractor(
             yolo_dir=_default_path('yolov5_anime') if yolo_dir is None else Path(yolo_dir),
             weights_path=_default_path('yolov5x_anime.pt') if yolo_weights is None else Path(yolo_weights),
             cascade_path=_default_path('anime-eyes-cascade.xml') if eyes_cascade is None else Path(eyes_cascade),
+            device='cpu',  # Always use CPU for detector to avoid CUDA init
         )
+        # Transform (no CUDA needed)
         self.transform = transforms.Compose([
             transforms.Resize((224, 224)),
             transforms.ToTensor(),
             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
         ])
+    def _ensure_model_loaded(self):
+        """Lazy load model - only called inside @spaces.GPU decorated function"""
+        if self._model is not None:
+            return
+        with self._model_lock:
+            if self._model is not None:
+                return
+            print("Loading model (lazy)...")
+            # Load checkpoint on CPU first
+            checkpoint = torch.load(self.checkpoint_path, map_location='cpu')
+            config = get_config()
+            self._model = ArtistStyleModel(
+                num_classes=len(checkpoint['artist_to_idx']),
+                embedding_dim=config.model.embedding_dim,
+                hidden_dim=config.model.hidden_dim,
+            )
+            self._model.load_state_dict(checkpoint['model_state_dict'])
+            # Determine device - in @spaces.GPU context, CUDA should be available
+            if self.requested_device.startswith('cuda') and torch.cuda.is_available():
+                device = torch.device(self.requested_device)
+                # Reduce VRAM: keep weights in FP16 on CUDA
+                self._model = self._model.to(dtype=torch.float16)
+            else:
+                device = torch.device('cpu')
+            self._model = self._model.to(device)
+            self._model.eval()
+            self.device = device
+            self.embedding_dim = config.model.embedding_dim
+            print("Model loaded successfully")
+    def _ensure_embeddings_loaded(self):
+        """Lazy load embeddings - no CUDA needed"""
+        if self._embeddings_loaded:
+            return
+        with self._model_lock:
+            if self._embeddings_loaded:
+                return
+            print("Loading embeddings...")
+            data = np.load(self.embeddings_path)
+            self._artist_names = data['artist_names'].tolist()
+            self._embeddings = data['embeddings']
+            self._embeddings_loaded = True
+            print(f"Loaded {len(self._artist_names)} artist embeddings")
     def preprocess_image(self, image: Optional[Image.Image]) -> Optional[torch.Tensor]:
         """이미지 전처리"""
         if image is None:
         face_image: Optional[Image.Image] = None,
         eye_image: Optional[Image.Image] = None,
     ) -> np.ndarray:
+        """이미지에서 임베딩 추출 - GPU lazy loading"""
+        # Load model on first call (inside @spaces.GPU context)
+        self._ensure_model_loaded()
         full_tensor = self.preprocess_image(full_image)
         if full_tensor is None:
             has_eye = torch.tensor([False]).to(self.device)
         with torch.cuda.amp.autocast(enabled=(self.device.type == 'cuda')):
+            embedding = self._model.get_embeddings(full, face, eye, has_face, has_eye)
         # Keep output float32 for downstream numpy similarity math.
         return embedding.squeeze(0).float().cpu().numpy()
         top_k: int = 10,
     ) -> List[Tuple[str, float]]:
         """유사 작가 검색"""
+        # Load embeddings if not loaded
+        self._ensure_embeddings_loaded()
         query_norm = query_embedding / np.linalg.norm(query_embedding)
+        embeddings_norm = self._embeddings / np.linalg.norm(self._embeddings, axis=1, keepdims=True)
         similarities = embeddings_norm @ query_norm
         top_indices = np.argsort(similarities)[::-1][:top_k]
+        return [(self._artist_names[i], float(similarities[i])) for i in top_indices]
     def extract_crops(self, full_image: Image.Image) -> Tuple[Optional[Image.Image], Optional[Image.Image], str]:
         """얼굴과 눈 자동 추출"""