LTX-2-3-sync

Paused

App Files Files Community

linoyts HF Staff commited on Mar 17

Commit

c879210

verified ·

1 Parent(s): fb8d538

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -7

app.py CHANGED Viewed

@@ -1030,9 +1030,6 @@ def generate_video(
 # SmolVLM2 — Auto-describe motion from reference video
 # ─────────────────────────────────────────────────────────────────────────────
 SMOLVLM_MODEL_ID = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
-# Pin to a revision known to work with transformers==4.57.6
-# (the main branch updated processor_config.json to reference a newer processor class)
-SMOLVLM_REVISION = "3444947b810d9efa1173515e44396d7710ba1042"
 _vlm_model = None
 _vlm_processor = None
@@ -1058,20 +1055,22 @@ def _load_vlm():
     if _vlm_model is None:
         from transformers import AutoProcessor, AutoModelForImageTextToText
-        print(f"[SmolVLM] Loading {SMOLVLM_MODEL_ID} (rev {SMOLVLM_REVISION[:8]})...")
-        _vlm_processor = AutoProcessor.from_pretrained(SMOLVLM_MODEL_ID, revision=SMOLVLM_REVISION)
         try:
             _vlm_model = AutoModelForImageTextToText.from_pretrained(
                 SMOLVLM_MODEL_ID,
-                revision=SMOLVLM_REVISION,
                 torch_dtype=torch.bfloat16,
                 _attn_implementation="flash_attention_2",
             ).to("cuda")
         except Exception:
             _vlm_model = AutoModelForImageTextToText.from_pretrained(
                 SMOLVLM_MODEL_ID,
-                revision=SMOLVLM_REVISION,
                 torch_dtype=torch.bfloat16,
             ).to("cuda")
         print("[SmolVLM] Model loaded!")
     return _vlm_model, _vlm_processor

 # SmolVLM2 — Auto-describe motion from reference video
 # ─────────────────────────────────────────────────────────────────────────────
 SMOLVLM_MODEL_ID = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
 _vlm_model = None
 _vlm_processor = None
     if _vlm_model is None:
         from transformers import AutoProcessor, AutoModelForImageTextToText
+        print(f"[SmolVLM] Loading {SMOLVLM_MODEL_ID}...")
+        _vlm_processor = AutoProcessor.from_pretrained(
+            SMOLVLM_MODEL_ID, trust_remote_code=True
+        )
         try:
             _vlm_model = AutoModelForImageTextToText.from_pretrained(
                 SMOLVLM_MODEL_ID,
                 torch_dtype=torch.bfloat16,
+                trust_remote_code=True,
                 _attn_implementation="flash_attention_2",
             ).to("cuda")
         except Exception:
             _vlm_model = AutoModelForImageTextToText.from_pretrained(
                 SMOLVLM_MODEL_ID,
                 torch_dtype=torch.bfloat16,
+                trust_remote_code=True,
             ).to("cuda")
         print("[SmolVLM] Model loaded!")
     return _vlm_model, _vlm_processor