primerz commited on
Commit
345b083
·
verified ·
1 Parent(s): ae0aa20

Update models.py

Browse files
Files changed (1) hide show
  1. models.py +18 -15
models.py CHANGED
@@ -60,19 +60,20 @@ def download_model_with_retry(repo_id, filename, max_retries=None):
60
 
61
 
62
  def load_face_analysis():
63
- """Load face analysis model with proper error handling."""
64
- print("Loading face analysis model...")
65
  try:
 
66
  face_app = FaceAnalysis(
67
  name=FACE_DETECTION_CONFIG['model_name'],
68
  root='./models/insightface',
69
- providers=['CUDAExecutionProvider', 'CPUExecutionProvider']
70
  )
71
  face_app.prepare(
72
- ctx_id=FACE_DETECTION_CONFIG['ctx_id'],
73
  det_size=FACE_DETECTION_CONFIG['det_size']
74
  )
75
- print(" [OK] Face analysis model loaded successfully")
76
  return face_app, True
77
  except Exception as e:
78
  print(f" [WARNING] Face detection not available: {e}")
@@ -80,12 +81,13 @@ def load_face_analysis():
80
 
81
 
82
  def load_depth_detector():
83
- """Load Zoe Depth detector."""
84
  print("Loading Zoe Depth detector...")
85
  try:
86
  zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
87
- zoe_depth.to(device)
88
- print(" [OK] Zoe Depth loaded successfully")
 
89
  return zoe_depth, True
90
  except Exception as e:
91
  print(f" [WARNING] Zoe Depth not available: {e}")
@@ -212,6 +214,7 @@ def load_caption_model():
212
  """
213
  Load caption model with proper error handling.
214
  Tries multiple models in order of quality.
 
215
  """
216
  print("Loading caption model...")
217
 
@@ -223,9 +226,9 @@ def load_caption_model():
223
  caption_processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
224
  caption_model = AutoModelForCausalLM.from_pretrained(
225
  "microsoft/git-large-coco",
226
- torch_dtype=dtype
227
- ).to(device)
228
- print(" [OK] GIT-Large model loaded")
229
  return caption_processor, caption_model, True, 'git'
230
  except Exception as e1:
231
  print(f" [INFO] GIT-Large not available: {e1}")
@@ -238,9 +241,9 @@ def load_caption_model():
238
  caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
239
  caption_model = BlipForConditionalGeneration.from_pretrained(
240
  "Salesforce/blip-image-captioning-base",
241
- torch_dtype=dtype
242
- ).to(device)
243
- print(" [OK] BLIP base model loaded")
244
  return caption_processor, caption_model, True, 'blip'
245
  except Exception as e2:
246
  print(f" [WARNING] Caption models not available: {e2}")
@@ -253,4 +256,4 @@ def set_clip_skip(pipe):
253
  print(f" [OK] CLIP skip set to {CLIP_SKIP}")
254
 
255
 
256
- print("[OK] Model loading functions ready")
 
60
 
61
 
62
  def load_face_analysis():
63
+ """Load face analysis model on CPU to save GPU memory."""
64
+ print("Loading face analysis model on CPU...")
65
  try:
66
+ # Force CPU execution for face analysis to save GPU memory
67
  face_app = FaceAnalysis(
68
  name=FACE_DETECTION_CONFIG['model_name'],
69
  root='./models/insightface',
70
+ providers=['CPUExecutionProvider'] # CPU only for face detection
71
  )
72
  face_app.prepare(
73
+ ctx_id=-1, # -1 for CPU
74
  det_size=FACE_DETECTION_CONFIG['det_size']
75
  )
76
+ print(" [OK] Face analysis model loaded on CPU (GPU memory saved)")
77
  return face_app, True
78
  except Exception as e:
79
  print(f" [WARNING] Face detection not available: {e}")
 
81
 
82
 
83
  def load_depth_detector():
84
+ """Load Zoe Depth detector with optimized memory management."""
85
  print("Loading Zoe Depth detector...")
86
  try:
87
  zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
88
+ # Start on CPU to save memory during initialization
89
+ zoe_depth = zoe_depth.to("cpu")
90
+ print(" [OK] Zoe Depth loaded (on CPU, will move to GPU when needed)")
91
  return zoe_depth, True
92
  except Exception as e:
93
  print(f" [WARNING] Zoe Depth not available: {e}")
 
214
  """
215
  Load caption model with proper error handling.
216
  Tries multiple models in order of quality.
217
+ Models start on CPU and move to GPU only when needed.
218
  """
219
  print("Loading caption model...")
220
 
 
226
  caption_processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
227
  caption_model = AutoModelForCausalLM.from_pretrained(
228
  "microsoft/git-large-coco",
229
+ torch_dtype=dtype # Use dtype from config
230
+ ).to("cpu") # Start on CPU to save GPU memory
231
+ print(" [OK] GIT-Large model loaded (on CPU, will move to GPU when needed)")
232
  return caption_processor, caption_model, True, 'git'
233
  except Exception as e1:
234
  print(f" [INFO] GIT-Large not available: {e1}")
 
241
  caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
242
  caption_model = BlipForConditionalGeneration.from_pretrained(
243
  "Salesforce/blip-image-captioning-base",
244
+ torch_dtype=dtype # Use dtype from config
245
+ ).to("cpu") # Start on CPU to save GPU memory
246
+ print(" [OK] BLIP base model loaded (on CPU, will move to GPU when needed)")
247
  return caption_processor, caption_model, True, 'blip'
248
  except Exception as e2:
249
  print(f" [WARNING] Caption models not available: {e2}")
 
256
  print(f" [OK] CLIP skip set to {CLIP_SKIP}")
257
 
258
 
259
+ print("[OK] Model loading functions ready")