DawnC commited on
Commit
0ef6c1f
·
verified ·
1 Parent(s): 4cac9e5

Update inpainting_module.py

Browse files
Files changed (1) hide show
  1. inpainting_module.py +15 -36
inpainting_module.py CHANGED
@@ -923,7 +923,7 @@ class InpaintingModule:
923
  if is_spaces:
924
  # On Spaces, use minimal preview steps
925
  preview_steps = min(preview_steps, 8)
926
- logger.info(f"Spaces environment - using {preview_steps} preview steps")
927
 
928
  preview_result = self._generate_inpaint(
929
  image=image,
@@ -937,7 +937,7 @@ class InpaintingModule:
937
  generator=generator
938
  )
939
  else:
940
- logger.info("Spaces environment - skipping preview to fit 300s limit")
941
 
942
  if preview_only:
943
  generation_time = time.time() - start_time
@@ -968,44 +968,23 @@ class InpaintingModule:
968
 
969
  # Optimize for Hugging Face Spaces ZeroGPU (stateless, 300s hard limit)
970
  if is_spaces:
971
- # ZeroGPU timing breakdown (stateless architecture):
972
- # - First-time model loading: ~150-180s (download + CPU load + GPU transfer)
973
- # - Subsequent GPU allocation: ~40-60s (CPU to GPU transfer only)
974
- # - Inference: ~15s/step (slower on shared H200 vs dedicated L4)
975
- # - Platform limit: 300s hard limit (Pro tier, cannot be overridden)
976
  #
977
- # Strategy:
978
- # 1. Skip preview stage to maximize quality budget
979
- # 2. Detect if this is first load (just initialized pipeline)
980
- # 3. Use fewer steps on first load, more steps on subsequent requests
981
-
982
- # Check if this is likely the first request after model loading
983
- # (models just loaded will have generation_time 0)
984
- is_first_load = not hasattr(self, '_first_generation_done')
985
-
986
- if is_first_load:
987
- # First request after Space startup or model reload
988
- # Time budget: 300s - 180s (loading) - 20s (overhead) = 100s
989
- # Safe steps: 100s / 15s = 6-7 steps
990
- spaces_max_steps = 7 # Conservative for first-time loading
991
- logger.warning("⚠️ First inpainting request - using reduced steps to avoid timeout")
992
- logger.warning(f" Model loading + 7 steps ≈ 280s (safe)")
993
- logger.warning(f" 💡 Tip: Re-run for higher quality (11-12 steps, ~245s)")
994
- self._first_generation_done = True
995
- else:
996
- # Subsequent requests (model already in CPU memory)
997
- # Time budget: 300s - 60s (CPU→GPU) - 20s (overhead) = 220s
998
- # Safe steps: 220s / 15s = 14 steps (use 11-12 for safety margin)
999
- spaces_max_steps = 11 # Balanced quality for subsequent requests
1000
 
1001
  if num_steps > spaces_max_steps:
1002
- original_steps = num_steps
1003
  num_steps = spaces_max_steps
1004
- logger.info(f"Spaces ZeroGPU - reduced steps: {original_steps} {num_steps}")
1005
- if is_first_load:
1006
- logger.info(f" (First load mode: prioritizing completion over quality)")
1007
- else:
1008
- logger.info(f" (Subsequent request: ~{60 + num_steps * 15}s estimated)")
1009
 
1010
  full_result = self._generate_inpaint(
1011
  image=image,
 
923
  if is_spaces:
924
  # On Spaces, use minimal preview steps
925
  preview_steps = min(preview_steps, 8)
926
+ logger.debug(f"Spaces environment - using {preview_steps} preview steps")
927
 
928
  preview_result = self._generate_inpaint(
929
  image=image,
 
937
  generator=generator
938
  )
939
  else:
940
+ logger.debug("Spaces environment - skipping preview to fit 300s limit")
941
 
942
  if preview_only:
943
  generation_time = time.time() - start_time
 
968
 
969
  # Optimize for Hugging Face Spaces ZeroGPU (stateless, 300s hard limit)
970
  if is_spaces:
971
+ # ZeroGPU timing breakdown with model caching:
972
+ # - Model loading from cache: ~60s (cached models, CPU to GPU transfer)
973
+ # - Inference: ~15s/step (on shared H200)
974
+ # - Blending & overhead: ~20s
975
+ # - Platform limit: 300s hard limit (Pro tier)
976
  #
977
+ # Strategy with unified 12-step approach:
978
+ # - Skip preview completely (done above)
979
+ # - Use consistent 12 steps for quality and color accuracy
980
+ # - Time budget: 60s (load) + 180s (12 steps) + 20s (blend) = 260s
981
+ # - Safety margin: 40s buffer for platform variability
982
+
983
+ spaces_max_steps = 12 # Unified step count for all templates
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
984
 
985
  if num_steps > spaces_max_steps:
 
986
  num_steps = spaces_max_steps
987
+ logger.debug(f"Spaces deployment: using {num_steps} steps (optimized for 300s limit)")
 
 
 
 
988
 
989
  full_result = self._generate_inpaint(
990
  image=image,