linoyts HF Staff commited on
Commit
fb8d538
Β·
verified Β·
1 Parent(s): 1ac11cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -22
app.py CHANGED
@@ -1030,6 +1030,9 @@ def generate_video(
1030
  # SmolVLM2 β€” Auto-describe motion from reference video
1031
  # ─────────────────────────────────────────────────────────────────────────────
1032
  SMOLVLM_MODEL_ID = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
 
 
 
1033
  _vlm_model = None
1034
  _vlm_processor = None
1035
 
@@ -1055,37 +1058,19 @@ def _load_vlm():
1055
  if _vlm_model is None:
1056
  from transformers import AutoProcessor, AutoModelForImageTextToText
1057
 
1058
- # Diagnostic: surface the real import error
1059
- try:
1060
- from transformers import SmolVLMProcessor
1061
- print(f"[SmolVLM] SmolVLMProcessor import OK: {SmolVLMProcessor}")
1062
- except ImportError as diag_e:
1063
- print(f"[SmolVLM] SmolVLMProcessor direct import failed: {diag_e}")
1064
- # Try to see what's actually missing
1065
- try:
1066
- import num2words
1067
- print(f"[SmolVLM] num2words OK: {num2words.__version__}")
1068
- except ImportError:
1069
- print("[SmolVLM] num2words is MISSING β€” installing now...")
1070
- subprocess.run([sys.executable, "-m", "pip", "install", "num2words"], check=True)
1071
- try:
1072
- import decord
1073
- print(f"[SmolVLM] decord OK")
1074
- except ImportError:
1075
- print("[SmolVLM] decord is MISSING β€” installing now...")
1076
- subprocess.run([sys.executable, "-m", "pip", "install", "decord"], check=True)
1077
-
1078
- print(f"[SmolVLM] Loading {SMOLVLM_MODEL_ID}...")
1079
- _vlm_processor = AutoProcessor.from_pretrained(SMOLVLM_MODEL_ID)
1080
  try:
1081
  _vlm_model = AutoModelForImageTextToText.from_pretrained(
1082
  SMOLVLM_MODEL_ID,
 
1083
  torch_dtype=torch.bfloat16,
1084
  _attn_implementation="flash_attention_2",
1085
  ).to("cuda")
1086
  except Exception:
1087
  _vlm_model = AutoModelForImageTextToText.from_pretrained(
1088
  SMOLVLM_MODEL_ID,
 
1089
  torch_dtype=torch.bfloat16,
1090
  ).to("cuda")
1091
  print("[SmolVLM] Model loaded!")
 
1030
  # SmolVLM2 β€” Auto-describe motion from reference video
1031
  # ─────────────────────────────────────────────────────────────────────────────
1032
  SMOLVLM_MODEL_ID = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
1033
+ # Pin to a revision known to work with transformers==4.57.6
1034
+ # (the main branch updated processor_config.json to reference a newer processor class)
1035
+ SMOLVLM_REVISION = "3444947b810d9efa1173515e44396d7710ba1042"
1036
  _vlm_model = None
1037
  _vlm_processor = None
1038
 
 
1058
  if _vlm_model is None:
1059
  from transformers import AutoProcessor, AutoModelForImageTextToText
1060
 
1061
+ print(f"[SmolVLM] Loading {SMOLVLM_MODEL_ID} (rev {SMOLVLM_REVISION[:8]})...")
1062
+ _vlm_processor = AutoProcessor.from_pretrained(SMOLVLM_MODEL_ID, revision=SMOLVLM_REVISION)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1063
  try:
1064
  _vlm_model = AutoModelForImageTextToText.from_pretrained(
1065
  SMOLVLM_MODEL_ID,
1066
+ revision=SMOLVLM_REVISION,
1067
  torch_dtype=torch.bfloat16,
1068
  _attn_implementation="flash_attention_2",
1069
  ).to("cuda")
1070
  except Exception:
1071
  _vlm_model = AutoModelForImageTextToText.from_pretrained(
1072
  SMOLVLM_MODEL_ID,
1073
+ revision=SMOLVLM_REVISION,
1074
  torch_dtype=torch.bfloat16,
1075
  ).to("cuda")
1076
  print("[SmolVLM] Model loaded!")