Instructions to use ViTeX-Bench/ViTeX-Edit-14B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use ViTeX-Bench/ViTeX-Edit-14B with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("ViTeX-Bench/ViTeX-Edit-14B", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| import av | |
| import numpy as np | |
| from io import BytesIO | |
| from .audio_video import write_video_audio as write_video_audio_ltx2 | |
| def encode_single_frame(output_file: str, image_array: np.ndarray, crf: float) -> None: | |
| container = av.open(output_file, "w", format="mp4") | |
| try: | |
| stream = container.add_stream("libx264", rate=1, options={"crf": str(crf), "preset": "veryfast"}) | |
| # Round to nearest multiple of 2 for compatibility with video codecs | |
| height = image_array.shape[0] // 2 * 2 | |
| width = image_array.shape[1] // 2 * 2 | |
| image_array = image_array[:height, :width] | |
| stream.height = height | |
| stream.width = width | |
| av_frame = av.VideoFrame.from_ndarray(image_array, format="rgb24").reformat(format="yuv420p") | |
| container.mux(stream.encode(av_frame)) | |
| container.mux(stream.encode()) | |
| finally: | |
| container.close() | |
| def decode_single_frame(video_file: str) -> np.array: | |
| container = av.open(video_file) | |
| try: | |
| stream = next(s for s in container.streams if s.type == "video") | |
| frame = next(container.decode(stream)) | |
| finally: | |
| container.close() | |
| return frame.to_ndarray(format="rgb24") | |
| def ltx2_preprocess(image: np.array, crf: float = 33) -> np.array: | |
| if crf == 0: | |
| return image | |
| with BytesIO() as output_file: | |
| encode_single_frame(output_file, image, crf) | |
| video_bytes = output_file.getvalue() | |
| with BytesIO(video_bytes) as video_file: | |
| image_array = decode_single_frame(video_file) | |
| return image_array | |