Tianshuo-Xu commited on
Commit
5a8be65
·
1 Parent(s): 46e86e6

fix zerogpu cuda fork init

Browse files
Files changed (2) hide show
  1. .gitignore +3 -1
  2. app.py +9 -10
.gitignore CHANGED
@@ -12,5 +12,7 @@ build/
12
  *.pt
13
  *.pth
14
  *.ckpt
15
- *.safetensors
16
  *.log
 
 
 
 
12
  *.pt
13
  *.pth
14
  *.ckpt
 
15
  *.log
16
+ *safetensors
17
+ internvl_embedding
18
+ *.ttf
app.py CHANGED
@@ -190,12 +190,6 @@ _cached_model_dir = preload_model_files()
190
  print("="*50)
191
 
192
 
193
- # ============================================================
194
- # FP8 Quantization (works with FA3)
195
- # ============================================================
196
- from torchao.quantization import quantize_, Float8DynamicActivationFloat8WeightConfig
197
-
198
-
199
  def init_generator():
200
  """Initialize the generator (without optimization - that's done separately)"""
201
  global generator, _cached_model_dir, _preloaded_embedding, _preloaded_tokenizer
@@ -249,9 +243,9 @@ def parse_font_style(font_style: str) -> str:
249
  return None
250
 
251
 
252
- # Initialize the generator globally BEFORE zeroGPU functions so weights are memory-mapped
253
- logger.info("Initializing generator globally...")
254
- generator = init_generator()
255
 
256
 
257
  def _get_generation_duration(text, font, author, num_steps, start_seed, num_images):
@@ -277,7 +271,12 @@ def run_generation(text, font, author, num_steps, start_seed, num_images):
277
  pass
278
 
279
  # Step 1: Load model
280
- logger.info("Models are already globally initialized and managed by ZeroGPU.")
 
 
 
 
 
281
  gen = generator
282
  # ZeroGPU automatically maps these to the acquired GPU during execution.
283
  # We must also correctly update internal Python attributes so runtime-generated latents go to GPU.
 
190
  print("="*50)
191
 
192
 
 
 
 
 
 
 
193
  def init_generator():
194
  """Initialize the generator (without optimization - that's done separately)"""
195
  global generator, _cached_model_dir, _preloaded_embedding, _preloaded_tokenizer
 
243
  return None
244
 
245
 
246
+ # IMPORTANT:
247
+ # Do NOT initialize generator globally at import time in ZeroGPU Spaces.
248
+ # Keep it lazy inside the @spaces.GPU worker to avoid any pre-fork CUDA side effects.
249
 
250
 
251
  def _get_generation_duration(text, font, author, num_steps, start_seed, num_images):
 
271
  pass
272
 
273
  # Step 1: Load model
274
+ global generator
275
+ if generator is None:
276
+ logger.info("Initializing generator lazily inside GPU worker...")
277
+ generator = init_generator()
278
+
279
+ logger.info("Using initialized generator in ZeroGPU worker.")
280
  gen = generator
281
  # ZeroGPU automatically maps these to the acquired GPU during execution.
282
  # We must also correctly update internal Python attributes so runtime-generated latents go to GPU.