Spaces:

davanstrien
/

diffusiongemma-ocr-correction

Running on Zero

davanstrien HF Staff commited on 1 day ago

Commit

90d93b6

verified ·

1 Parent(s): 163634f

Upload app.py with huggingface_hub

Files changed (1) hide show

app.py CHANGED Viewed

@@ -38,20 +38,30 @@ MAX_INPUT_CHARS = 1200  # roughly the 220-token benchmark cap
 def model_path(volume_path: str, model_id: str) -> str:
-    """Prefer a mounted hf:// volume (see `hf spaces volumes`) over a download."""
     return volume_path if os.path.isdir(volume_path) else model_id
 DG_PATH = model_path("/models/dg", "google/diffusiongemma-26B-A4B-it")
 G4_PATH = model_path("/models/gemma", "google/gemma-4-E4B-it")
 print(f"loading DiffusionGemma from {DG_PATH} ...")
 dg_processor = AutoProcessor.from_pretrained(DG_PATH)
 dg_model = DiffusionGemmaForBlockDiffusion.from_pretrained(DG_PATH, dtype=torch.bfloat16).to("cuda")
 print(f"loading Gemma-4 from {G4_PATH} ...")
 g4_processor = AutoProcessor.from_pretrained(G4_PATH)
 g4_model = AutoModelForMultimodalLM.from_pretrained(G4_PATH, dtype=torch.bfloat16).to("cuda")
-print("models loaded")
 STOP_MARKERS = ("<turn|>", "<eos>", "<end_of_turn>", "<pad>")

 def model_path(volume_path: str, model_id: str) -> str:
+    """Prefer a mounted hf:// volume (see `hf spaces volumes`) over a download.
+    Volume reads go over FUSE, which is sometimes slower for safetensors loading
+    than a fresh download to local disk — set USE_VOLUMES=0 (Space variable) to
+    force from_pretrained downloads for comparison.
+    """
+    if os.environ.get("USE_VOLUMES", "1") == "0":
+        return model_id
     return volume_path if os.path.isdir(volume_path) else model_id
 DG_PATH = model_path("/models/dg", "google/diffusiongemma-26B-A4B-it")
 G4_PATH = model_path("/models/gemma", "google/gemma-4-E4B-it")
+t0 = time.perf_counter()
 print(f"loading DiffusionGemma from {DG_PATH} ...")
 dg_processor = AutoProcessor.from_pretrained(DG_PATH)
 dg_model = DiffusionGemmaForBlockDiffusion.from_pretrained(DG_PATH, dtype=torch.bfloat16).to("cuda")
+print(f"DiffusionGemma loaded in {time.perf_counter() - t0:.0f}s")
+t0 = time.perf_counter()
 print(f"loading Gemma-4 from {G4_PATH} ...")
 g4_processor = AutoProcessor.from_pretrained(G4_PATH)
 g4_model = AutoModelForMultimodalLM.from_pretrained(G4_PATH, dtype=torch.bfloat16).to("cuda")
+print(f"Gemma-4 loaded in {time.perf_counter() - t0:.0f}s")
 STOP_MARKERS = ("<turn|>", "<eos>", "<end_of_turn>", "<pad>")