iljung1106 commited on
Commit ·
0f961a6
1
Parent(s): 39ec415
GPU lazy load
Browse files- app.py +4 -4
- inference_utils.py +87 -59
app.py
CHANGED
|
@@ -67,14 +67,14 @@ def main():
|
|
| 67 |
model_path, embeddings_path, yolo_path = download_files()
|
| 68 |
|
| 69 |
# Initialize App
|
| 70 |
-
#
|
| 71 |
-
#
|
| 72 |
print("Initializing Application...")
|
| 73 |
app = StyleEmbedderApp(
|
| 74 |
checkpoint_path=model_path,
|
| 75 |
embeddings_path=embeddings_path,
|
| 76 |
-
device='cuda',
|
| 77 |
-
detector_device='
|
| 78 |
yolo_weights=yolo_path
|
| 79 |
)
|
| 80 |
|
|
|
|
| 67 |
model_path, embeddings_path, yolo_path = download_files()
|
| 68 |
|
| 69 |
# Initialize App
|
| 70 |
+
# Model loading is lazy - happens inside @spaces.GPU decorated function
|
| 71 |
+
# This avoids CUDA initialization in main process
|
| 72 |
print("Initializing Application...")
|
| 73 |
app = StyleEmbedderApp(
|
| 74 |
checkpoint_path=model_path,
|
| 75 |
embeddings_path=embeddings_path,
|
| 76 |
+
device='cuda', # Will be used when model loads (inside @spaces.GPU context)
|
| 77 |
+
detector_device='cpu', # Always use CPU for detector to avoid CUDA init
|
| 78 |
yolo_weights=yolo_path
|
| 79 |
)
|
| 80 |
|
inference_utils.py
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import threading
|
| 2 |
from pathlib import Path
|
| 3 |
-
from typing import List, Optional, Tuple
|
| 4 |
import numpy as np
|
| 5 |
import torch
|
| 6 |
from PIL import Image
|
|
@@ -93,15 +97,11 @@ class FaceEyeExtractor:
|
|
| 93 |
import cv2
|
| 94 |
|
| 95 |
# Try to locate yolov5_anime if not strictly at yolo_dir
|
| 96 |
-
# But for now assume yolo_dir is correct or we need to look around
|
| 97 |
if not self.yolo_dir.exists():
|
| 98 |
# Fallback: check if it's in the current working directory
|
| 99 |
cwd_yolo = Path("yolov5_anime").resolve()
|
| 100 |
if cwd_yolo.exists():
|
| 101 |
self.yolo_dir = cwd_yolo
|
| 102 |
-
else:
|
| 103 |
-
# Just warning, let it fail later if critical or maybe it is installed in env
|
| 104 |
-
pass
|
| 105 |
|
| 106 |
if str(self.yolo_dir.resolve()) not in sys.path and self.yolo_dir.exists():
|
| 107 |
sys.path.insert(0, str(self.yolo_dir.resolve()))
|
|
@@ -114,8 +114,6 @@ class FaceEyeExtractor:
|
|
| 114 |
from models.experimental import attempt_load # type: ignore
|
| 115 |
from utils.torch_utils import select_device # type: ignore
|
| 116 |
except ImportError:
|
| 117 |
-
# If yolov5 is not in path, we can't do much.
|
| 118 |
-
# Assuming the user ensures yolov5_anime folder is present.
|
| 119 |
if not self.yolo_dir.exists():
|
| 120 |
raise RuntimeError(f"yolov5_anime dir not found: {self.yolo_dir}")
|
| 121 |
raise
|
|
@@ -129,7 +127,9 @@ class FaceEyeExtractor:
|
|
| 129 |
|
| 130 |
_torch.load = patched_load
|
| 131 |
try:
|
| 132 |
-
|
|
|
|
|
|
|
| 133 |
if not self.weights_path.exists():
|
| 134 |
raise RuntimeError(f"YOLO weights not found: {self.weights_path}")
|
| 135 |
self._yolo_model = attempt_load(str(self.weights_path), map_location=self._yolo_device)
|
|
@@ -425,7 +425,7 @@ class FaceEyeExtractor:
|
|
| 425 |
|
| 426 |
|
| 427 |
class StyleEmbedderApp:
|
| 428 |
-
"""Web UI 앱"""
|
| 429 |
|
| 430 |
def __init__(
|
| 431 |
self,
|
|
@@ -437,62 +437,86 @@ class StyleEmbedderApp:
|
|
| 437 |
eyes_cascade: Optional[str] = None,
|
| 438 |
detector_device: str = 'cpu',
|
| 439 |
):
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
# Face/Eye extractor (lazy load)
|
| 454 |
self._extractor = FaceEyeExtractor(
|
| 455 |
yolo_dir=_default_path('yolov5_anime') if yolo_dir is None else Path(yolo_dir),
|
| 456 |
weights_path=_default_path('yolov5x_anime.pt') if yolo_weights is None else Path(yolo_weights),
|
| 457 |
cascade_path=_default_path('anime-eyes-cascade.xml') if eyes_cascade is None else Path(eyes_cascade),
|
| 458 |
-
device=
|
| 459 |
-
)
|
| 460 |
-
|
| 461 |
-
# 모델 로드
|
| 462 |
-
print("Loading model...")
|
| 463 |
-
# Always load checkpoint on CPU to avoid duplicating large tensors on GPU.
|
| 464 |
-
checkpoint = torch.load(checkpoint_path, map_location='cpu')
|
| 465 |
-
config = get_config()
|
| 466 |
-
|
| 467 |
-
self.model = ArtistStyleModel(
|
| 468 |
-
num_classes=len(checkpoint['artist_to_idx']),
|
| 469 |
-
embedding_dim=config.model.embedding_dim,
|
| 470 |
-
hidden_dim=config.model.hidden_dim,
|
| 471 |
)
|
| 472 |
-
self.model.load_state_dict(checkpoint['model_state_dict'])
|
| 473 |
-
|
| 474 |
-
# Reduce VRAM: keep weights in FP16 on CUDA.
|
| 475 |
-
if self.device.type == 'cuda':
|
| 476 |
-
self.model = self.model.to(dtype=torch.float16)
|
| 477 |
-
self.model = self.model.to(self.device)
|
| 478 |
-
self.model.eval()
|
| 479 |
-
|
| 480 |
-
self.embedding_dim = config.model.embedding_dim
|
| 481 |
|
| 482 |
-
#
|
| 483 |
-
print("Loading embeddings...")
|
| 484 |
-
data = np.load(embeddings_path)
|
| 485 |
-
self.artist_names = data['artist_names'].tolist()
|
| 486 |
-
self.embeddings = data['embeddings']
|
| 487 |
-
print(f"Loaded {len(self.artist_names)} artist embeddings")
|
| 488 |
-
|
| 489 |
-
# Transform
|
| 490 |
self.transform = transforms.Compose([
|
| 491 |
transforms.Resize((224, 224)),
|
| 492 |
transforms.ToTensor(),
|
| 493 |
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
| 494 |
])
|
| 495 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
def preprocess_image(self, image: Optional[Image.Image]) -> Optional[torch.Tensor]:
|
| 497 |
"""이미지 전처리"""
|
| 498 |
if image is None:
|
|
@@ -523,7 +547,10 @@ class StyleEmbedderApp:
|
|
| 523 |
face_image: Optional[Image.Image] = None,
|
| 524 |
eye_image: Optional[Image.Image] = None,
|
| 525 |
) -> np.ndarray:
|
| 526 |
-
"""이미지에서 임베딩 추출"""
|
|
|
|
|
|
|
|
|
|
| 527 |
|
| 528 |
full_tensor = self.preprocess_image(full_image)
|
| 529 |
if full_tensor is None:
|
|
@@ -564,9 +591,7 @@ class StyleEmbedderApp:
|
|
| 564 |
has_eye = torch.tensor([False]).to(self.device)
|
| 565 |
|
| 566 |
with torch.cuda.amp.autocast(enabled=(self.device.type == 'cuda')):
|
| 567 |
-
|
| 568 |
-
# In ZeroGPU, the function runs on GPU.
|
| 569 |
-
embedding = self.model.get_embeddings(full, face, eye, has_face, has_eye)
|
| 570 |
|
| 571 |
# Keep output float32 for downstream numpy similarity math.
|
| 572 |
return embedding.squeeze(0).float().cpu().numpy()
|
|
@@ -577,12 +602,15 @@ class StyleEmbedderApp:
|
|
| 577 |
top_k: int = 10,
|
| 578 |
) -> List[Tuple[str, float]]:
|
| 579 |
"""유사 작가 검색"""
|
|
|
|
|
|
|
|
|
|
| 580 |
query_norm = query_embedding / np.linalg.norm(query_embedding)
|
| 581 |
-
embeddings_norm = self.
|
| 582 |
similarities = embeddings_norm @ query_norm
|
| 583 |
|
| 584 |
top_indices = np.argsort(similarities)[::-1][:top_k]
|
| 585 |
-
return [(self.
|
| 586 |
|
| 587 |
def extract_crops(self, full_image: Image.Image) -> Tuple[Optional[Image.Image], Optional[Image.Image], str]:
|
| 588 |
"""얼굴과 눈 자동 추출"""
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Three-View-Style-Embedder - Inference Utilities
|
| 3 |
+
Lazy loading for Hugging Face Spaces compatibility
|
| 4 |
+
"""
|
| 5 |
import threading
|
| 6 |
from pathlib import Path
|
| 7 |
+
from typing import List, Optional, Tuple
|
| 8 |
import numpy as np
|
| 9 |
import torch
|
| 10 |
from PIL import Image
|
|
|
|
| 97 |
import cv2
|
| 98 |
|
| 99 |
# Try to locate yolov5_anime if not strictly at yolo_dir
|
|
|
|
| 100 |
if not self.yolo_dir.exists():
|
| 101 |
# Fallback: check if it's in the current working directory
|
| 102 |
cwd_yolo = Path("yolov5_anime").resolve()
|
| 103 |
if cwd_yolo.exists():
|
| 104 |
self.yolo_dir = cwd_yolo
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
if str(self.yolo_dir.resolve()) not in sys.path and self.yolo_dir.exists():
|
| 107 |
sys.path.insert(0, str(self.yolo_dir.resolve()))
|
|
|
|
| 114 |
from models.experimental import attempt_load # type: ignore
|
| 115 |
from utils.torch_utils import select_device # type: ignore
|
| 116 |
except ImportError:
|
|
|
|
|
|
|
| 117 |
if not self.yolo_dir.exists():
|
| 118 |
raise RuntimeError(f"yolov5_anime dir not found: {self.yolo_dir}")
|
| 119 |
raise
|
|
|
|
| 127 |
|
| 128 |
_torch.load = patched_load
|
| 129 |
try:
|
| 130 |
+
# For Spaces, use CPU for detector to avoid CUDA init in main process
|
| 131 |
+
detector_device = 'cpu' if self.device.startswith('cuda') else self.device
|
| 132 |
+
self._yolo_device = select_device(detector_device)
|
| 133 |
if not self.weights_path.exists():
|
| 134 |
raise RuntimeError(f"YOLO weights not found: {self.weights_path}")
|
| 135 |
self._yolo_model = attempt_load(str(self.weights_path), map_location=self._yolo_device)
|
|
|
|
| 425 |
|
| 426 |
|
| 427 |
class StyleEmbedderApp:
|
| 428 |
+
"""Web UI 앱 - Lazy loading for Spaces compatibility"""
|
| 429 |
|
| 430 |
def __init__(
|
| 431 |
self,
|
|
|
|
| 437 |
eyes_cascade: Optional[str] = None,
|
| 438 |
detector_device: str = 'cpu',
|
| 439 |
):
|
| 440 |
+
# Store paths - don't load anything yet to avoid CUDA init in main process
|
| 441 |
+
self.checkpoint_path = checkpoint_path
|
| 442 |
+
self.embeddings_path = embeddings_path
|
| 443 |
+
self.requested_device = device
|
| 444 |
+
self.detector_device = detector_device
|
| 445 |
+
|
| 446 |
+
# Model will be loaded lazily in @spaces.GPU decorated function
|
| 447 |
+
self._model = None
|
| 448 |
+
self._model_lock = threading.RLock()
|
| 449 |
+
self._embeddings_loaded = False
|
| 450 |
+
self._artist_names = None
|
| 451 |
+
self._embeddings = None
|
| 452 |
+
|
| 453 |
+
# Face/Eye extractor (lazy load, uses CPU for detector to avoid CUDA init)
|
| 454 |
self._extractor = FaceEyeExtractor(
|
| 455 |
yolo_dir=_default_path('yolov5_anime') if yolo_dir is None else Path(yolo_dir),
|
| 456 |
weights_path=_default_path('yolov5x_anime.pt') if yolo_weights is None else Path(yolo_weights),
|
| 457 |
cascade_path=_default_path('anime-eyes-cascade.xml') if eyes_cascade is None else Path(eyes_cascade),
|
| 458 |
+
device='cpu', # Always use CPU for detector to avoid CUDA init
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
|
| 461 |
+
# Transform (no CUDA needed)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
self.transform = transforms.Compose([
|
| 463 |
transforms.Resize((224, 224)),
|
| 464 |
transforms.ToTensor(),
|
| 465 |
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
| 466 |
])
|
| 467 |
|
| 468 |
+
def _ensure_model_loaded(self):
|
| 469 |
+
"""Lazy load model - only called inside @spaces.GPU decorated function"""
|
| 470 |
+
if self._model is not None:
|
| 471 |
+
return
|
| 472 |
+
|
| 473 |
+
with self._model_lock:
|
| 474 |
+
if self._model is not None:
|
| 475 |
+
return
|
| 476 |
+
|
| 477 |
+
print("Loading model (lazy)...")
|
| 478 |
+
# Load checkpoint on CPU first
|
| 479 |
+
checkpoint = torch.load(self.checkpoint_path, map_location='cpu')
|
| 480 |
+
config = get_config()
|
| 481 |
+
|
| 482 |
+
self._model = ArtistStyleModel(
|
| 483 |
+
num_classes=len(checkpoint['artist_to_idx']),
|
| 484 |
+
embedding_dim=config.model.embedding_dim,
|
| 485 |
+
hidden_dim=config.model.hidden_dim,
|
| 486 |
+
)
|
| 487 |
+
self._model.load_state_dict(checkpoint['model_state_dict'])
|
| 488 |
+
|
| 489 |
+
# Determine device - in @spaces.GPU context, CUDA should be available
|
| 490 |
+
if self.requested_device.startswith('cuda') and torch.cuda.is_available():
|
| 491 |
+
device = torch.device(self.requested_device)
|
| 492 |
+
# Reduce VRAM: keep weights in FP16 on CUDA
|
| 493 |
+
self._model = self._model.to(dtype=torch.float16)
|
| 494 |
+
else:
|
| 495 |
+
device = torch.device('cpu')
|
| 496 |
+
|
| 497 |
+
self._model = self._model.to(device)
|
| 498 |
+
self._model.eval()
|
| 499 |
+
self.device = device
|
| 500 |
+
self.embedding_dim = config.model.embedding_dim
|
| 501 |
+
|
| 502 |
+
print("Model loaded successfully")
|
| 503 |
+
|
| 504 |
+
def _ensure_embeddings_loaded(self):
|
| 505 |
+
"""Lazy load embeddings - no CUDA needed"""
|
| 506 |
+
if self._embeddings_loaded:
|
| 507 |
+
return
|
| 508 |
+
|
| 509 |
+
with self._model_lock:
|
| 510 |
+
if self._embeddings_loaded:
|
| 511 |
+
return
|
| 512 |
+
|
| 513 |
+
print("Loading embeddings...")
|
| 514 |
+
data = np.load(self.embeddings_path)
|
| 515 |
+
self._artist_names = data['artist_names'].tolist()
|
| 516 |
+
self._embeddings = data['embeddings']
|
| 517 |
+
self._embeddings_loaded = True
|
| 518 |
+
print(f"Loaded {len(self._artist_names)} artist embeddings")
|
| 519 |
+
|
| 520 |
def preprocess_image(self, image: Optional[Image.Image]) -> Optional[torch.Tensor]:
|
| 521 |
"""이미지 전처리"""
|
| 522 |
if image is None:
|
|
|
|
| 547 |
face_image: Optional[Image.Image] = None,
|
| 548 |
eye_image: Optional[Image.Image] = None,
|
| 549 |
) -> np.ndarray:
|
| 550 |
+
"""이미지에서 임베딩 추출 - GPU lazy loading"""
|
| 551 |
+
|
| 552 |
+
# Load model on first call (inside @spaces.GPU context)
|
| 553 |
+
self._ensure_model_loaded()
|
| 554 |
|
| 555 |
full_tensor = self.preprocess_image(full_image)
|
| 556 |
if full_tensor is None:
|
|
|
|
| 591 |
has_eye = torch.tensor([False]).to(self.device)
|
| 592 |
|
| 593 |
with torch.cuda.amp.autocast(enabled=(self.device.type == 'cuda')):
|
| 594 |
+
embedding = self._model.get_embeddings(full, face, eye, has_face, has_eye)
|
|
|
|
|
|
|
| 595 |
|
| 596 |
# Keep output float32 for downstream numpy similarity math.
|
| 597 |
return embedding.squeeze(0).float().cpu().numpy()
|
|
|
|
| 602 |
top_k: int = 10,
|
| 603 |
) -> List[Tuple[str, float]]:
|
| 604 |
"""유사 작가 검색"""
|
| 605 |
+
# Load embeddings if not loaded
|
| 606 |
+
self._ensure_embeddings_loaded()
|
| 607 |
+
|
| 608 |
query_norm = query_embedding / np.linalg.norm(query_embedding)
|
| 609 |
+
embeddings_norm = self._embeddings / np.linalg.norm(self._embeddings, axis=1, keepdims=True)
|
| 610 |
similarities = embeddings_norm @ query_norm
|
| 611 |
|
| 612 |
top_indices = np.argsort(similarities)[::-1][:top_k]
|
| 613 |
+
return [(self._artist_names[i], float(similarities[i])) for i in top_indices]
|
| 614 |
|
| 615 |
def extract_crops(self, full_image: Image.Image) -> Tuple[Optional[Image.Image], Optional[Image.Image], str]:
|
| 616 |
"""얼굴과 눈 자동 추출"""
|