Spaces:
Running
Running
Upload utils/model_loader.py with huggingface_hub
Browse files- utils/model_loader.py +35 -31
utils/model_loader.py
CHANGED
|
@@ -104,54 +104,58 @@ class ModelManager:
|
|
| 104 |
)
|
| 105 |
return self.model_paths["wav2vec"]
|
| 106 |
|
| 107 |
-
def load_wan_model(self, size="infinitetalk-480", device="cuda"):
|
| 108 |
"""
|
| 109 |
-
Load Wan
|
| 110 |
|
| 111 |
Args:
|
| 112 |
-
size: Model size configuration
|
| 113 |
device: Device to load model on
|
|
|
|
| 114 |
|
| 115 |
Returns:
|
| 116 |
-
Loaded
|
| 117 |
"""
|
| 118 |
-
if "
|
| 119 |
import wan
|
| 120 |
-
from wan.configs import
|
| 121 |
|
| 122 |
model_path = self.get_wan_model_path()
|
| 123 |
infinitetalk_path = self.get_infinitetalk_weights_path()
|
|
|
|
| 124 |
|
| 125 |
-
logger.info(f"Loading
|
| 126 |
|
| 127 |
-
#
|
| 128 |
task = "infinitetalk-14B"
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
|
| 145 |
-
#
|
| 146 |
-
#
|
| 147 |
-
model = wan.WanModel(args)
|
| 148 |
-
model.to(device)
|
| 149 |
-
model.eval()
|
| 150 |
|
| 151 |
-
self.models["
|
| 152 |
-
logger.info("
|
| 153 |
|
| 154 |
-
return self.models["
|
| 155 |
|
| 156 |
def load_audio_encoder(self, device="cuda"):
|
| 157 |
"""
|
|
|
|
| 104 |
)
|
| 105 |
return self.model_paths["wav2vec"]
|
| 106 |
|
| 107 |
+
def load_wan_model(self, size="infinitetalk-480", device="cuda", offload_model=True):
|
| 108 |
"""
|
| 109 |
+
Load Wan InfiniteTalk pipeline for inference
|
| 110 |
|
| 111 |
Args:
|
| 112 |
+
size: Model size configuration (infinitetalk-480 or infinitetalk-720)
|
| 113 |
device: Device to load model on
|
| 114 |
+
offload_model: Whether to offload model to CPU between forwards
|
| 115 |
|
| 116 |
Returns:
|
| 117 |
+
Loaded InfiniteTalkPipeline
|
| 118 |
"""
|
| 119 |
+
if "wan_pipeline" not in self.models:
|
| 120 |
import wan
|
| 121 |
+
from wan.configs import WAN_CONFIGS
|
| 122 |
|
| 123 |
model_path = self.get_wan_model_path()
|
| 124 |
infinitetalk_path = self.get_infinitetalk_weights_path()
|
| 125 |
+
infinitetalk_weights = os.path.join(infinitetalk_path, "infinitetalk.safetensors")
|
| 126 |
|
| 127 |
+
logger.info(f"Loading InfiniteTalk pipeline from {model_path}...")
|
| 128 |
|
| 129 |
+
# Get configuration for infinitetalk-14B
|
| 130 |
task = "infinitetalk-14B"
|
| 131 |
+
cfg = WAN_CONFIGS[task]
|
| 132 |
+
|
| 133 |
+
# Create InfiniteTalk pipeline
|
| 134 |
+
# This matches the initialization in generate_infinitetalk.py
|
| 135 |
+
pipeline = wan.InfiniteTalkPipeline(
|
| 136 |
+
config=cfg,
|
| 137 |
+
checkpoint_dir=model_path,
|
| 138 |
+
quant_dir=None, # No quantization for now
|
| 139 |
+
device_id=device if isinstance(device, int) else 0,
|
| 140 |
+
rank=0, # Single GPU
|
| 141 |
+
t5_fsdp=False,
|
| 142 |
+
dit_fsdp=False,
|
| 143 |
+
use_usp=False,
|
| 144 |
+
t5_cpu=False,
|
| 145 |
+
lora_dir=None,
|
| 146 |
+
lora_scales=None,
|
| 147 |
+
quant=None,
|
| 148 |
+
dit_path=None,
|
| 149 |
+
infinitetalk_dir=infinitetalk_weights
|
| 150 |
+
)
|
| 151 |
|
| 152 |
+
# Enable memory management for low VRAM if needed
|
| 153 |
+
# pipeline.enable_vram_management(num_persistent_param_in_dit=0)
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
+
self.models["wan_pipeline"] = pipeline
|
| 156 |
+
logger.info("InfiniteTalk pipeline loaded successfully")
|
| 157 |
|
| 158 |
+
return self.models["wan_pipeline"]
|
| 159 |
|
| 160 |
def load_audio_encoder(self, device="cuda"):
|
| 161 |
"""
|