Spaces:
Running on Zero
Running on Zero
Tianshuo-Xu commited on
Commit ·
5a8be65
1
Parent(s): 46e86e6
fix zerogpu cuda fork init
Browse files- .gitignore +3 -1
- app.py +9 -10
.gitignore
CHANGED
|
@@ -12,5 +12,7 @@ build/
|
|
| 12 |
*.pt
|
| 13 |
*.pth
|
| 14 |
*.ckpt
|
| 15 |
-
*.safetensors
|
| 16 |
*.log
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
*.pt
|
| 13 |
*.pth
|
| 14 |
*.ckpt
|
|
|
|
| 15 |
*.log
|
| 16 |
+
*safetensors
|
| 17 |
+
internvl_embedding
|
| 18 |
+
*.ttf
|
app.py
CHANGED
|
@@ -190,12 +190,6 @@ _cached_model_dir = preload_model_files()
|
|
| 190 |
print("="*50)
|
| 191 |
|
| 192 |
|
| 193 |
-
# ============================================================
|
| 194 |
-
# FP8 Quantization (works with FA3)
|
| 195 |
-
# ============================================================
|
| 196 |
-
from torchao.quantization import quantize_, Float8DynamicActivationFloat8WeightConfig
|
| 197 |
-
|
| 198 |
-
|
| 199 |
def init_generator():
|
| 200 |
"""Initialize the generator (without optimization - that's done separately)"""
|
| 201 |
global generator, _cached_model_dir, _preloaded_embedding, _preloaded_tokenizer
|
|
@@ -249,9 +243,9 @@ def parse_font_style(font_style: str) -> str:
|
|
| 249 |
return None
|
| 250 |
|
| 251 |
|
| 252 |
-
#
|
| 253 |
-
|
| 254 |
-
|
| 255 |
|
| 256 |
|
| 257 |
def _get_generation_duration(text, font, author, num_steps, start_seed, num_images):
|
|
@@ -277,7 +271,12 @@ def run_generation(text, font, author, num_steps, start_seed, num_images):
|
|
| 277 |
pass
|
| 278 |
|
| 279 |
# Step 1: Load model
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
gen = generator
|
| 282 |
# ZeroGPU automatically maps these to the acquired GPU during execution.
|
| 283 |
# We must also correctly update internal Python attributes so runtime-generated latents go to GPU.
|
|
|
|
| 190 |
print("="*50)
|
| 191 |
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
def init_generator():
|
| 194 |
"""Initialize the generator (without optimization - that's done separately)"""
|
| 195 |
global generator, _cached_model_dir, _preloaded_embedding, _preloaded_tokenizer
|
|
|
|
| 243 |
return None
|
| 244 |
|
| 245 |
|
| 246 |
+
# IMPORTANT:
|
| 247 |
+
# Do NOT initialize generator globally at import time in ZeroGPU Spaces.
|
| 248 |
+
# Keep it lazy inside the @spaces.GPU worker to avoid any pre-fork CUDA side effects.
|
| 249 |
|
| 250 |
|
| 251 |
def _get_generation_duration(text, font, author, num_steps, start_seed, num_images):
|
|
|
|
| 271 |
pass
|
| 272 |
|
| 273 |
# Step 1: Load model
|
| 274 |
+
global generator
|
| 275 |
+
if generator is None:
|
| 276 |
+
logger.info("Initializing generator lazily inside GPU worker...")
|
| 277 |
+
generator = init_generator()
|
| 278 |
+
|
| 279 |
+
logger.info("Using initialized generator in ZeroGPU worker.")
|
| 280 |
gen = generator
|
| 281 |
# ZeroGPU automatically maps these to the acquired GPU during execution.
|
| 282 |
# We must also correctly update internal Python attributes so runtime-generated latents go to GPU.
|