linoyts HF Staff commited on
Commit
c879210
Β·
verified Β·
1 Parent(s): fb8d538

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -1030,9 +1030,6 @@ def generate_video(
1030
  # SmolVLM2 β€” Auto-describe motion from reference video
1031
  # ─────────────────────────────────────────────────────────────────────────────
1032
  SMOLVLM_MODEL_ID = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
1033
- # Pin to a revision known to work with transformers==4.57.6
1034
- # (the main branch updated processor_config.json to reference a newer processor class)
1035
- SMOLVLM_REVISION = "3444947b810d9efa1173515e44396d7710ba1042"
1036
  _vlm_model = None
1037
  _vlm_processor = None
1038
 
@@ -1058,20 +1055,22 @@ def _load_vlm():
1058
  if _vlm_model is None:
1059
  from transformers import AutoProcessor, AutoModelForImageTextToText
1060
 
1061
- print(f"[SmolVLM] Loading {SMOLVLM_MODEL_ID} (rev {SMOLVLM_REVISION[:8]})...")
1062
- _vlm_processor = AutoProcessor.from_pretrained(SMOLVLM_MODEL_ID, revision=SMOLVLM_REVISION)
 
 
1063
  try:
1064
  _vlm_model = AutoModelForImageTextToText.from_pretrained(
1065
  SMOLVLM_MODEL_ID,
1066
- revision=SMOLVLM_REVISION,
1067
  torch_dtype=torch.bfloat16,
 
1068
  _attn_implementation="flash_attention_2",
1069
  ).to("cuda")
1070
  except Exception:
1071
  _vlm_model = AutoModelForImageTextToText.from_pretrained(
1072
  SMOLVLM_MODEL_ID,
1073
- revision=SMOLVLM_REVISION,
1074
  torch_dtype=torch.bfloat16,
 
1075
  ).to("cuda")
1076
  print("[SmolVLM] Model loaded!")
1077
  return _vlm_model, _vlm_processor
 
1030
  # SmolVLM2 β€” Auto-describe motion from reference video
1031
  # ─────────────────────────────────────────────────────────────────────────────
1032
  SMOLVLM_MODEL_ID = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
 
 
 
1033
  _vlm_model = None
1034
  _vlm_processor = None
1035
 
 
1055
  if _vlm_model is None:
1056
  from transformers import AutoProcessor, AutoModelForImageTextToText
1057
 
1058
+ print(f"[SmolVLM] Loading {SMOLVLM_MODEL_ID}...")
1059
+ _vlm_processor = AutoProcessor.from_pretrained(
1060
+ SMOLVLM_MODEL_ID, trust_remote_code=True
1061
+ )
1062
  try:
1063
  _vlm_model = AutoModelForImageTextToText.from_pretrained(
1064
  SMOLVLM_MODEL_ID,
 
1065
  torch_dtype=torch.bfloat16,
1066
+ trust_remote_code=True,
1067
  _attn_implementation="flash_attention_2",
1068
  ).to("cuda")
1069
  except Exception:
1070
  _vlm_model = AutoModelForImageTextToText.from_pretrained(
1071
  SMOLVLM_MODEL_ID,
 
1072
  torch_dtype=torch.bfloat16,
1073
+ trust_remote_code=True,
1074
  ).to("cuda")
1075
  print("[SmolVLM] Model loaded!")
1076
  return _vlm_model, _vlm_processor