Spaces:
Running on Zero
Running on Zero
Commit ·
64f71ea
1
Parent(s): 26e8bca
Fix HunyuanFoley CLAP tokenizer overflow for long prompts
Browse filesCLAP text encoder has a 512-token max but encode_text_feat() called
the tokenizer without truncation=True/max_length, causing a tensor
shape mismatch when prompts exceed 512 tokens (523 > 512 error).
Added truncation=True, max_length=512 to the tokenizer call.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
HunyuanVideo-Foley/hunyuanvideo_foley/utils/feature_utils.py
CHANGED
|
@@ -129,7 +129,7 @@ def encode_video_features(video_path, model_dict):
|
|
| 129 |
@torch.inference_mode()
|
| 130 |
def encode_text_feat(text: List[str], model_dict):
|
| 131 |
# x: (B, L)
|
| 132 |
-
inputs = model_dict.clap_tokenizer(text, padding=True, return_tensors="pt").to(model_dict.device)
|
| 133 |
outputs = model_dict.clap_model(**inputs, output_hidden_states=True, return_dict=True)
|
| 134 |
return outputs.last_hidden_state, outputs.attentions
|
| 135 |
|
|
|
|
| 129 |
@torch.inference_mode()
|
| 130 |
def encode_text_feat(text: List[str], model_dict):
|
| 131 |
# x: (B, L)
|
| 132 |
+
inputs = model_dict.clap_tokenizer(text, padding=True, truncation=True, max_length=512, return_tensors="pt").to(model_dict.device)
|
| 133 |
outputs = model_dict.clap_model(**inputs, output_hidden_states=True, return_dict=True)
|
| 134 |
return outputs.last_hidden_state, outputs.attentions
|
| 135 |
|