Txu647 commited on
Commit
c322e84
·
1 Parent(s): f36f495

Use lightweight internvl_embedding from TSXu/Unicalli_Pro

Browse files

- Download only internvl_embedding folder (~500MB) instead of full InternVL3 (~2GB)
- Faster loading and reduced memory usage

Files changed (1) hide show
  1. app.py +16 -1
app.py CHANGED
@@ -68,6 +68,9 @@ def init_generator():
68
  if generator is None:
69
  # Enable optimized SDPA attention backends for faster inference
70
  import torch
 
 
 
71
  try:
72
  torch.backends.cuda.enable_flash_sdp(True)
73
  torch.backends.cuda.enable_mem_efficient_sdp(True)
@@ -76,6 +79,18 @@ def init_generator():
76
  except Exception as e:
77
  print(f"Note: Could not configure SDPA backends: {e}")
78
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  # Lazy import to avoid CUDA initialization at module load time
80
  from inference import CalligraphyGenerator
81
 
@@ -83,7 +98,7 @@ def init_generator():
83
  model_name="flux-dev",
84
  device="cuda",
85
  offload=True, # Enable offload to save GPU memory
86
- intern_vlm_path="OpenGVLab/InternVL3-1B",
87
  checkpoint_path="TSXu/Unicalli_Pro", # Auto-download sharded model from root
88
  font_descriptions_path='dataset/chirography.json',
89
  author_descriptions_path='dataset/calligraphy_styles_en.json',
 
68
  if generator is None:
69
  # Enable optimized SDPA attention backends for faster inference
70
  import torch
71
+ import os
72
+ from huggingface_hub import snapshot_download
73
+
74
  try:
75
  torch.backends.cuda.enable_flash_sdp(True)
76
  torch.backends.cuda.enable_mem_efficient_sdp(True)
 
79
  except Exception as e:
80
  print(f"Note: Could not configure SDPA backends: {e}")
81
 
82
+ # Download internvl_embedding (lightweight, ~500MB) from TSXu/Unicalli_Pro
83
+ # This only loads the embedding layer, not the full InternVL3 model (~2GB)
84
+ hf_token = os.environ.get("HF_TOKEN", None)
85
+ print("Downloading internvl_embedding from TSXu/Unicalli_Pro...")
86
+ local_dir = snapshot_download(
87
+ repo_id="TSXu/Unicalli_Pro",
88
+ allow_patterns=["internvl_embedding/*"],
89
+ token=hf_token
90
+ )
91
+ intern_vlm_path = os.path.join(local_dir, "internvl_embedding")
92
+ print(f"Using lightweight embedding from: {intern_vlm_path}")
93
+
94
  # Lazy import to avoid CUDA initialization at module load time
95
  from inference import CalligraphyGenerator
96
 
 
98
  model_name="flux-dev",
99
  device="cuda",
100
  offload=True, # Enable offload to save GPU memory
101
+ intern_vlm_path=intern_vlm_path, # Lightweight embedding (~500MB vs ~2GB)
102
  checkpoint_path="TSXu/Unicalli_Pro", # Auto-download sharded model from root
103
  font_descriptions_path='dataset/chirography.json',
104
  author_descriptions_path='dataset/calligraphy_styles_en.json',