Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -1030,9 +1030,6 @@ def generate_video(
|
|
| 1030 |
# SmolVLM2 β Auto-describe motion from reference video
|
| 1031 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1032 |
SMOLVLM_MODEL_ID = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
|
| 1033 |
-
# Pin to a revision known to work with transformers==4.57.6
|
| 1034 |
-
# (the main branch updated processor_config.json to reference a newer processor class)
|
| 1035 |
-
SMOLVLM_REVISION = "3444947b810d9efa1173515e44396d7710ba1042"
|
| 1036 |
_vlm_model = None
|
| 1037 |
_vlm_processor = None
|
| 1038 |
|
|
@@ -1058,20 +1055,22 @@ def _load_vlm():
|
|
| 1058 |
if _vlm_model is None:
|
| 1059 |
from transformers import AutoProcessor, AutoModelForImageTextToText
|
| 1060 |
|
| 1061 |
-
print(f"[SmolVLM] Loading {SMOLVLM_MODEL_ID}
|
| 1062 |
-
_vlm_processor = AutoProcessor.from_pretrained(
|
|
|
|
|
|
|
| 1063 |
try:
|
| 1064 |
_vlm_model = AutoModelForImageTextToText.from_pretrained(
|
| 1065 |
SMOLVLM_MODEL_ID,
|
| 1066 |
-
revision=SMOLVLM_REVISION,
|
| 1067 |
torch_dtype=torch.bfloat16,
|
|
|
|
| 1068 |
_attn_implementation="flash_attention_2",
|
| 1069 |
).to("cuda")
|
| 1070 |
except Exception:
|
| 1071 |
_vlm_model = AutoModelForImageTextToText.from_pretrained(
|
| 1072 |
SMOLVLM_MODEL_ID,
|
| 1073 |
-
revision=SMOLVLM_REVISION,
|
| 1074 |
torch_dtype=torch.bfloat16,
|
|
|
|
| 1075 |
).to("cuda")
|
| 1076 |
print("[SmolVLM] Model loaded!")
|
| 1077 |
return _vlm_model, _vlm_processor
|
|
|
|
| 1030 |
# SmolVLM2 β Auto-describe motion from reference video
|
| 1031 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 1032 |
SMOLVLM_MODEL_ID = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
|
|
|
|
|
|
|
|
|
|
| 1033 |
_vlm_model = None
|
| 1034 |
_vlm_processor = None
|
| 1035 |
|
|
|
|
| 1055 |
if _vlm_model is None:
|
| 1056 |
from transformers import AutoProcessor, AutoModelForImageTextToText
|
| 1057 |
|
| 1058 |
+
print(f"[SmolVLM] Loading {SMOLVLM_MODEL_ID}...")
|
| 1059 |
+
_vlm_processor = AutoProcessor.from_pretrained(
|
| 1060 |
+
SMOLVLM_MODEL_ID, trust_remote_code=True
|
| 1061 |
+
)
|
| 1062 |
try:
|
| 1063 |
_vlm_model = AutoModelForImageTextToText.from_pretrained(
|
| 1064 |
SMOLVLM_MODEL_ID,
|
|
|
|
| 1065 |
torch_dtype=torch.bfloat16,
|
| 1066 |
+
trust_remote_code=True,
|
| 1067 |
_attn_implementation="flash_attention_2",
|
| 1068 |
).to("cuda")
|
| 1069 |
except Exception:
|
| 1070 |
_vlm_model = AutoModelForImageTextToText.from_pretrained(
|
| 1071 |
SMOLVLM_MODEL_ID,
|
|
|
|
| 1072 |
torch_dtype=torch.bfloat16,
|
| 1073 |
+
trust_remote_code=True,
|
| 1074 |
).to("cuda")
|
| 1075 |
print("[SmolVLM] Model loaded!")
|
| 1076 |
return _vlm_model, _vlm_processor
|