Instructions to use Benjer/stable-video-diffusion-img2vid-xt with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use Benjer/stable-video-diffusion-img2vid-xt with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline from diffusers.utils import load_image, export_to_video # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("Benjer/stable-video-diffusion-img2vid-xt", dtype=torch.bfloat16, device_map="cuda") pipe.to("cuda") prompt = "A man with short gray hair plays a red electric guitar." image = load_image( "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/guitar-man.png" ) output = pipe(image=image, prompt=prompt).frames[0] export_to_video(output, "output.mp4") - Notebooks
- Google Colab
- Kaggle
| { | |
| "_name_or_path": "/home/suraj_huggingface_co/.cache/huggingface/hub/models--diffusers--svd-xt/snapshots/9703ded20c957c340781ee710b75660826deb487/image_encoder", | |
| "architectures": [ | |
| "CLIPVisionModelWithProjection" | |
| ], | |
| "attention_dropout": 0.0, | |
| "dropout": 0.0, | |
| "hidden_act": "gelu", | |
| "hidden_size": 1280, | |
| "image_size": 224, | |
| "initializer_factor": 1.0, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 5120, | |
| "layer_norm_eps": 1e-05, | |
| "model_type": "clip_vision_model", | |
| "num_attention_heads": 16, | |
| "num_channels": 3, | |
| "num_hidden_layers": 32, | |
| "patch_size": 14, | |
| "projection_dim": 1024, | |
| "torch_dtype": "float16", | |
| "transformers_version": "4.34.0.dev0" | |
| } | |