#!/usr/bin/env python3 """ End-to-end video generation benchmark for LTX-Video. Compares performance between: - Baseline (no optimizations) - Custom CUDA kernels (optimized RMSNorm) - torch.compile optimization Usage: python benchmark_example.py --use-optimized-kernels python benchmark_example.py --no-optimized-kernels --compile python benchmark_example.py --use-optimized-kernels --compile # Mutually exclusive! """ import argparse import sys import time import torch def parse_args(): parser = argparse.ArgumentParser(description="LTX-Video Benchmark") parser.add_argument("--use-optimized-kernels", action="store_true", help="Use custom H100 CUDA kernels") parser.add_argument("--no-optimized-kernels", action="store_true", help="Use baseline implementation") parser.add_argument("--compile", action="store_true", help="Enable torch.compile on transformer") parser.add_argument("--batch-size", type=int, default=1, help="Number of videos per prompt") parser.add_argument("--num-frames", type=int, default=161, help="Number of frames to generate") parser.add_argument("--height", type=int, default=512, help="Video height in pixels") parser.add_argument("--width", type=int, default=768, help="Video width in pixels") parser.add_argument("--steps", type=int, default=50, help="Denoising steps") parser.add_argument("--warmup-iterations", type=int, default=2, help="Warmup runs before benchmark") return parser.parse_args() def main(): args = parse_args() if args.use_optimized_kernels and args.compile: print("WARNING: --use-optimized-kernels and --compile are mutually exclusive.") print("Custom kernels require PyTorch custom op registration to work with torch.compile.") sys.exit(1) print("=" * 60) print("LTX-Video Benchmark") print("=" * 60) print(f"Configuration:") print(f" Optimized kernels: {args.use_optimized_kernels}") print(f" torch.compile: {args.compile}") print(f" Batch size: {args.batch_size}") print(f" Frames: {args.num_frames}") print(f" Resolution: {args.width}x{args.height}") print(f" Steps: {args.steps}") print(f" Warmup iterations: {args.warmup_iterations}") if not torch.cuda.is_available(): print("\nCUDA not available. This benchmark requires a GPU.") sys.exit(1) print(f"\nDevice: {torch.cuda.get_device_name()}") print(f"Capability: {torch.cuda.get_device_capability()}") # NOTE: Full implementation requires diffusers and ltx_kernels print("\nThis is a reference benchmark script.") print("To run the full benchmark, install diffusers and build the CUDA kernels:") print(" pip install diffusers transformers accelerate") print(" cd examples/ltx_video && uv pip install -e .") if __name__ == "__main__": main()