Spaces:

gradientguild
/

SynthCXR

Running on Zero

App Files Files Community

gradientguild commited on 3 days ago

Commit

463e35b

verified ·

1 Parent(s): a4aa5c5

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

app.py +15 -13
requirements.txt +1 -0
synthcxr/constants.py +3 -3

app.py CHANGED Viewed

@@ -4,6 +4,8 @@
 from __future__ import annotations
 import os
 from pathlib import Path
 import spaces
@@ -37,17 +39,18 @@ CONDITION_CHOICES = [
 SEVERITY_CHOICES = ["(none)", "mild", "moderate", "severe"]
 # ---------------------------------------------------------------------------
-# Pipeline (lazy-loaded once)
 # ---------------------------------------------------------------------------
-_pipe = None
-def get_pipeline():
-    """Load the diffusion pipeline + LoRA weights into GPU memory (once)."""
-    global _pipe
-    if _pipe is not None:
-        return _pipe
     from synthcxr.pipeline import load_lora_weights, load_pipeline
     device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -58,26 +61,25 @@ def get_pipeline():
     vram_limit = float(vram_limit_str) if vram_limit_str else None
     print(f"[INFO] Loading QwenImagePipeline (device={device}, dtype={dtype}, vram_limit={vram_limit}) …")
-    _pipe = load_pipeline(device, dtype, vram_limit=vram_limit)
     # LORA_EPOCH env var: which epoch checkpoint to load (default: 2)
     lora_epoch = os.environ.get("LORA_EPOCH", "2")
     lora = LORA_DIR / f"epoch-{lora_epoch}.safetensors"
     if not lora.exists():
-        # Try step-based checkpoints or any available .safetensors
         candidates = sorted(LORA_DIR.glob("*.safetensors")) if LORA_DIR.exists() else []
         if candidates:
             lora = candidates[-1]
             print(f"[WARN] epoch-{lora_epoch} not found, falling back to {lora.name}")
         else:
             print("[WARN] No LoRA checkpoint found – running base model only.")
-            return _pipe
     print(f"[INFO] Loading LoRA from {lora}")
-    load_lora_weights(_pipe, lora)
     print("[INFO] Pipeline ready.")
-    return _pipe
 # ---------------------------------------------------------------------------
@@ -164,7 +166,7 @@ def generate_cxr(
     if mask_image is None:
         raise gr.Error("Please select or upload a mask first.")
-    pipe = get_pipeline()
     if pipe is None:
         raise gr.Error("Pipeline not loaded. GPU may be unavailable.")

 from __future__ import annotations
 import os
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 from pathlib import Path
 import spaces
 SEVERITY_CHOICES = ["(none)", "mild", "moderate", "severe"]
 # ---------------------------------------------------------------------------
+# Pipeline loading (fresh on each @spaces.GPU call; model files cached on disk)
 # ---------------------------------------------------------------------------
+def load_fresh_pipeline():
+    """Load the pipeline + LoRA onto the *currently allocated* GPU.
+    ZeroGPU deallocates GPU memory after each ``@spaces.GPU`` call, so we
+    cannot cache tensors between calls.  However, diffsynth caches the
+    model files on disk (HF Hub cache), so only tensor loading happens
+    here — not a full download.
+    """
     from synthcxr.pipeline import load_lora_weights, load_pipeline
     device = "cuda" if torch.cuda.is_available() else "cpu"
     vram_limit = float(vram_limit_str) if vram_limit_str else None
     print(f"[INFO] Loading QwenImagePipeline (device={device}, dtype={dtype}, vram_limit={vram_limit}) …")
+    pipe = load_pipeline(device, dtype, vram_limit=vram_limit)
     # LORA_EPOCH env var: which epoch checkpoint to load (default: 2)
     lora_epoch = os.environ.get("LORA_EPOCH", "2")
     lora = LORA_DIR / f"epoch-{lora_epoch}.safetensors"
     if not lora.exists():
         candidates = sorted(LORA_DIR.glob("*.safetensors")) if LORA_DIR.exists() else []
         if candidates:
             lora = candidates[-1]
             print(f"[WARN] epoch-{lora_epoch} not found, falling back to {lora.name}")
         else:
             print("[WARN] No LoRA checkpoint found – running base model only.")
+            return pipe
     print(f"[INFO] Loading LoRA from {lora}")
+    load_lora_weights(pipe, lora)
     print("[INFO] Pipeline ready.")
+    return pipe
 # ---------------------------------------------------------------------------
     if mask_image is None:
         raise gr.Error("Please select or upload a mask first.")
+    pipe = load_fresh_pipeline()
     if pipe is None:
         raise gr.Error("Pipeline not loaded. GPU may be unavailable.")

requirements.txt CHANGED Viewed

@@ -5,3 +5,4 @@ scipy
 Pillow
 numpy
 torch

 Pillow
 numpy
 torch
+hf_transfer

synthcxr/constants.py CHANGED Viewed

@@ -49,6 +49,6 @@ SEVERITY_MODIFIERS: dict[str, str] = {
     "significant": "significant",
 }
-DEFAULT_MODEL_ID = "Qwen/Qwen-Image-Edit-2511"
-TEXT_ENCODER_MODEL_ID = "Qwen/Qwen-Image"
-PROCESSOR_MODEL_ID = "Qwen/Qwen-Image-Edit"

     "significant": "significant",
 }
+DEFAULT_MODEL_ID = "gradientguild/SynthCXR-Qwen-Weights"
+TEXT_ENCODER_MODEL_ID = "gradientguild/SynthCXR-Qwen-Weights"
+PROCESSOR_MODEL_ID = "gradientguild/SynthCXR-Qwen-Weights"