Spaces:

TSXu
/

UniCalli_Dev

Running on Zero

Txu647 commited on Jan 28

Commit

c322e84

1 Parent(s): f36f495

Use lightweight internvl_embedding from TSXu/Unicalli_Pro

- Download only internvl_embedding folder (~500MB) instead of full InternVL3 (~2GB)
- Faster loading and reduced memory usage

Files changed (1) hide show

app.py +16 -1

app.py CHANGED Viewed

@@ -68,6 +68,9 @@ def init_generator():
     if generator is None:
         # Enable optimized SDPA attention backends for faster inference
         import torch
         try:
             torch.backends.cuda.enable_flash_sdp(True)
             torch.backends.cuda.enable_mem_efficient_sdp(True)
@@ -76,6 +79,18 @@ def init_generator():
         except Exception as e:
             print(f"Note: Could not configure SDPA backends: {e}")
         # Lazy import to avoid CUDA initialization at module load time
         from inference import CalligraphyGenerator
@@ -83,7 +98,7 @@ def init_generator():
             model_name="flux-dev",
             device="cuda",
             offload=True,  # Enable offload to save GPU memory
-            intern_vlm_path="OpenGVLab/InternVL3-1B",
             checkpoint_path="TSXu/Unicalli_Pro",  # Auto-download sharded model from root
             font_descriptions_path='dataset/chirography.json',
             author_descriptions_path='dataset/calligraphy_styles_en.json',

     if generator is None:
         # Enable optimized SDPA attention backends for faster inference
         import torch
+        import os
+        from huggingface_hub import snapshot_download
         try:
             torch.backends.cuda.enable_flash_sdp(True)
             torch.backends.cuda.enable_mem_efficient_sdp(True)
         except Exception as e:
             print(f"Note: Could not configure SDPA backends: {e}")
+        # Download internvl_embedding (lightweight, ~500MB) from TSXu/Unicalli_Pro
+        # This only loads the embedding layer, not the full InternVL3 model (~2GB)
+        hf_token = os.environ.get("HF_TOKEN", None)
+        print("Downloading internvl_embedding from TSXu/Unicalli_Pro...")
+        local_dir = snapshot_download(
+            repo_id="TSXu/Unicalli_Pro",
+            allow_patterns=["internvl_embedding/*"],
+            token=hf_token
+        )
+        intern_vlm_path = os.path.join(local_dir, "internvl_embedding")
+        print(f"Using lightweight embedding from: {intern_vlm_path}")
         # Lazy import to avoid CUDA initialization at module load time
         from inference import CalligraphyGenerator
             model_name="flux-dev",
             device="cuda",
             offload=True,  # Enable offload to save GPU memory
+            intern_vlm_path=intern_vlm_path,  # Lightweight embedding (~500MB vs ~2GB)
             checkpoint_path="TSXu/Unicalli_Pro",  # Auto-download sharded model from root
             font_descriptions_path='dataset/chirography.json',
             author_descriptions_path='dataset/calligraphy_styles_en.json',