Transformers How to use LanguageBind/LanguageBind_Video with Transformers:
# Use a pipeline as a high-level helper
from transformers import pipeline
pipe = pipeline("zero-shot-image-classification", model="LanguageBind/LanguageBind_Video")
pipe(
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hub/parrots.png",
candidate_labels=["animals", "humans", "landscape"],
) # Load model directly
from transformers import AutoModelForZeroShotImageClassification
model = AutoModelForZeroShotImageClassification.from_pretrained("LanguageBind/LanguageBind_Video", dtype="auto")