Spaces:
Running on Zero
Running on Zero
Use lightweight internvl_embedding from TSXu/Unicalli_Pro
Browse files- Download only internvl_embedding folder (~500MB) instead of full InternVL3 (~2GB)
- Faster loading and reduced memory usage
app.py
CHANGED
|
@@ -68,6 +68,9 @@ def init_generator():
|
|
| 68 |
if generator is None:
|
| 69 |
# Enable optimized SDPA attention backends for faster inference
|
| 70 |
import torch
|
|
|
|
|
|
|
|
|
|
| 71 |
try:
|
| 72 |
torch.backends.cuda.enable_flash_sdp(True)
|
| 73 |
torch.backends.cuda.enable_mem_efficient_sdp(True)
|
|
@@ -76,6 +79,18 @@ def init_generator():
|
|
| 76 |
except Exception as e:
|
| 77 |
print(f"Note: Could not configure SDPA backends: {e}")
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
# Lazy import to avoid CUDA initialization at module load time
|
| 80 |
from inference import CalligraphyGenerator
|
| 81 |
|
|
@@ -83,7 +98,7 @@ def init_generator():
|
|
| 83 |
model_name="flux-dev",
|
| 84 |
device="cuda",
|
| 85 |
offload=True, # Enable offload to save GPU memory
|
| 86 |
-
intern_vlm_path=
|
| 87 |
checkpoint_path="TSXu/Unicalli_Pro", # Auto-download sharded model from root
|
| 88 |
font_descriptions_path='dataset/chirography.json',
|
| 89 |
author_descriptions_path='dataset/calligraphy_styles_en.json',
|
|
|
|
| 68 |
if generator is None:
|
| 69 |
# Enable optimized SDPA attention backends for faster inference
|
| 70 |
import torch
|
| 71 |
+
import os
|
| 72 |
+
from huggingface_hub import snapshot_download
|
| 73 |
+
|
| 74 |
try:
|
| 75 |
torch.backends.cuda.enable_flash_sdp(True)
|
| 76 |
torch.backends.cuda.enable_mem_efficient_sdp(True)
|
|
|
|
| 79 |
except Exception as e:
|
| 80 |
print(f"Note: Could not configure SDPA backends: {e}")
|
| 81 |
|
| 82 |
+
# Download internvl_embedding (lightweight, ~500MB) from TSXu/Unicalli_Pro
|
| 83 |
+
# This only loads the embedding layer, not the full InternVL3 model (~2GB)
|
| 84 |
+
hf_token = os.environ.get("HF_TOKEN", None)
|
| 85 |
+
print("Downloading internvl_embedding from TSXu/Unicalli_Pro...")
|
| 86 |
+
local_dir = snapshot_download(
|
| 87 |
+
repo_id="TSXu/Unicalli_Pro",
|
| 88 |
+
allow_patterns=["internvl_embedding/*"],
|
| 89 |
+
token=hf_token
|
| 90 |
+
)
|
| 91 |
+
intern_vlm_path = os.path.join(local_dir, "internvl_embedding")
|
| 92 |
+
print(f"Using lightweight embedding from: {intern_vlm_path}")
|
| 93 |
+
|
| 94 |
# Lazy import to avoid CUDA initialization at module load time
|
| 95 |
from inference import CalligraphyGenerator
|
| 96 |
|
|
|
|
| 98 |
model_name="flux-dev",
|
| 99 |
device="cuda",
|
| 100 |
offload=True, # Enable offload to save GPU memory
|
| 101 |
+
intern_vlm_path=intern_vlm_path, # Lightweight embedding (~500MB vs ~2GB)
|
| 102 |
checkpoint_path="TSXu/Unicalli_Pro", # Auto-download sharded model from root
|
| 103 |
font_descriptions_path='dataset/chirography.json',
|
| 104 |
author_descriptions_path='dataset/calligraphy_styles_en.json',
|