Instructions to use yitongl/5090_test with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use yitongl/5090_test with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("yitongl/5090_test", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| # SPDX-License-Identifier: Apache-2.0 | |
| # Adapted from vllm: https://github.com/vllm-project/vllm/blob/v0.7.3/vllm/envs.py | |
| import os | |
| from collections.abc import Callable | |
| from typing import TYPE_CHECKING, Any | |
| if TYPE_CHECKING: | |
| FASTVIDEO_RINGBUFFER_WARNING_INTERVAL: int = 60 | |
| FASTVIDEO_NCCL_SO_PATH: str | None = None | |
| LD_LIBRARY_PATH: str | None = None | |
| LOCAL_RANK: int = 0 | |
| CUDA_VISIBLE_DEVICES: str | None = None | |
| FASTVIDEO_CACHE_ROOT: str = os.path.expanduser("~/.cache/fastvideo") | |
| FASTVIDEO_CONFIG_ROOT: str = os.path.expanduser("~/.config/fastvideo") | |
| FASTVIDEO_CONFIGURE_LOGGING: int = 1 | |
| FASTVIDEO_RAY_PER_WORKER_GPUS: float = 1.0 | |
| FASTVIDEO_LOGGING_LEVEL: str = "INFO" | |
| FASTVIDEO_LOGGING_PREFIX: str = "" | |
| FASTVIDEO_LOGGING_CONFIG_PATH: str | None = None | |
| FASTVIDEO_TRACE_FUNCTION: int = 0 | |
| FASTVIDEO_ATTENTION_BACKEND: str | None = None | |
| FASTVIDEO_WORKER_MULTIPROC_METHOD: str = "spawn" | |
| FASTVIDEO_TARGET_DEVICE: str = "cuda" | |
| MAX_JOBS: str | None = None | |
| NVCC_THREADS: str | None = None | |
| CMAKE_BUILD_TYPE: str | None = None | |
| VERBOSE: bool = False | |
| FASTVIDEO_TORCH_PROFILER_DIR: str | None = None | |
| FASTVIDEO_TORCH_PROFILER_RECORD_SHAPES: bool = False | |
| FASTVIDEO_TORCH_PROFILER_WITH_PROFILE_MEMORY: bool = False | |
| FASTVIDEO_TORCH_PROFILER_WITH_STACK: bool = True | |
| FASTVIDEO_TORCH_PROFILER_WITH_FLOPS: bool = False | |
| FASTVIDEO_TORCH_PROFILER_WAIT_STEPS: int = 2 | |
| FASTVIDEO_TORCH_PROFILER_WARMUP_STEPS: int = 1 | |
| FASTVIDEO_TORCH_PROFILER_ACTIVE_STEPS: int = 2 | |
| FASTVIDEO_TORCH_PROFILE_REGIONS: str = "" | |
| FASTVIDEO_SERVER_DEV_MODE: bool = False | |
| FASTVIDEO_STAGE_LOGGING: bool = False | |
| FASTVIDEO_HOST_IP: str = "" | |
| FASTVIDEO_LOOPBACK_IP: str = "" | |
| def get_default_cache_root() -> str: | |
| return os.getenv( | |
| "XDG_CACHE_HOME", | |
| os.path.join(os.path.expanduser("~"), ".cache"), | |
| ) | |
| def get_default_config_root() -> str: | |
| return os.getenv( | |
| "XDG_CONFIG_HOME", | |
| os.path.join(os.path.expanduser("~"), ".config"), | |
| ) | |
| def maybe_convert_int(value: str | None) -> int | None: | |
| if value is None: | |
| return None | |
| return int(value) | |
| # The begin-* and end* here are used by the documentation generator | |
| # to extract the used env vars. | |
| # begin-env-vars-definition | |
| environment_variables: dict[str, Callable[[], Any]] = { | |
| # ================== Installation Time Env Vars ================== | |
| # Target device of FastVideo, supporting [cuda (by default), | |
| # rocm, neuron, cpu, openvino] | |
| "FASTVIDEO_TARGET_DEVICE": | |
| lambda: os.getenv("FASTVIDEO_TARGET_DEVICE", "cuda"), | |
| # Maximum number of compilation jobs to run in parallel. | |
| # By default this is the number of CPUs | |
| "MAX_JOBS": | |
| lambda: os.getenv("MAX_JOBS", None), | |
| # Number of threads to use for nvcc | |
| # By default this is 1. | |
| # If set, `MAX_JOBS` will be reduced to avoid oversubscribing the CPU. | |
| "NVCC_THREADS": | |
| lambda: os.getenv("NVCC_THREADS", None), | |
| # If set, fastvideo will use precompiled binaries (*.so) | |
| "FASTVIDEO_USE_PRECOMPILED": | |
| lambda: bool(os.environ.get("FASTVIDEO_USE_PRECOMPILED")) or bool( | |
| os.environ.get("FASTVIDEO_PRECOMPILED_WHEEL_LOCATION")), | |
| # CMake build type | |
| # If not set, defaults to "Debug" or "RelWithDebInfo" | |
| # Available options: "Debug", "Release", "RelWithDebInfo" | |
| "CMAKE_BUILD_TYPE": | |
| lambda: os.getenv("CMAKE_BUILD_TYPE"), | |
| # If set, fastvideo will print verbose logs during installation | |
| "VERBOSE": | |
| lambda: bool(int(os.getenv('VERBOSE', '0'))), | |
| # Root directory for FASTVIDEO configuration files | |
| # Defaults to `~/.config/fastvideo` unless `XDG_CONFIG_HOME` is set | |
| # Note that this not only affects how fastvideo finds its configuration files | |
| # during runtime, but also affects how fastvideo installs its configuration | |
| # files during **installation**. | |
| "FASTVIDEO_CONFIG_ROOT": | |
| lambda: os.path.expanduser( | |
| os.getenv( | |
| "FASTVIDEO_CONFIG_ROOT", | |
| os.path.join(get_default_config_root(), "fastvideo"), | |
| )), | |
| # ================== Runtime Env Vars ================== | |
| # Root directory for FASTVIDEO cache files | |
| # Defaults to `~/.cache/fastvideo` unless `XDG_CACHE_HOME` is set | |
| "FASTVIDEO_CACHE_ROOT": | |
| lambda: os.path.expanduser(os.getenv( | |
| "FASTVIDEO_CACHE_ROOT", | |
| os.path.join(get_default_cache_root(), "fastvideo"), | |
| )), | |
| # used in distributed environment to determine the ip address | |
| # of the current node, when the node has multiple network interfaces. | |
| # If you are using multi-node inference, you should set this differently | |
| # on each node. | |
| "FASTVIDEO_HOST_IP": | |
| lambda: os.getenv("FASTVIDEO_HOST_IP", ""), | |
| # Used to force set up loopback IP | |
| "FASTVIDEO_LOOPBACK_IP": | |
| lambda: os.getenv("FASTVIDEO_LOOPBACK_IP", ""), | |
| # Number of GPUs per worker in Ray, if it is set to be a fraction, | |
| # it allows ray to schedule multiple actors on a single GPU, | |
| # so that users can colocate other actors on the same GPUs as FastVideo. | |
| "FASTVIDEO_RAY_PER_WORKER_GPUS": | |
| lambda: float(os.getenv("FASTVIDEO_RAY_PER_WORKER_GPUS", "1.0")), | |
| # Interval in seconds to log a warning message when the ring buffer is full | |
| "FASTVIDEO_RINGBUFFER_WARNING_INTERVAL": | |
| lambda: int(os.environ.get("FASTVIDEO_RINGBUFFER_WARNING_INTERVAL", "60")), | |
| # Path to the NCCL library file. It is needed because nccl>=2.19 brought | |
| # by PyTorch contains a bug: https://github.com/NVIDIA/nccl/issues/1234 | |
| "FASTVIDEO_NCCL_SO_PATH": | |
| lambda: os.environ.get("FASTVIDEO_NCCL_SO_PATH", None), | |
| # when `FASTVIDEO_NCCL_SO_PATH` is not set, fastvideo will try to find the nccl | |
| # library file in the locations specified by `LD_LIBRARY_PATH` | |
| "LD_LIBRARY_PATH": | |
| lambda: os.environ.get("LD_LIBRARY_PATH", None), | |
| # Internal flag to enable Dynamo fullgraph capture | |
| "FASTVIDEO_TEST_DYNAMO_FULLGRAPH_CAPTURE": | |
| lambda: bool(os.environ.get("FASTVIDEO_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"), | |
| # local rank of the process in the distributed setting, used to determine | |
| # the GPU device id | |
| "LOCAL_RANK": | |
| lambda: int(os.environ.get("LOCAL_RANK", "0")), | |
| # used to control the visible devices in the distributed setting | |
| "CUDA_VISIBLE_DEVICES": | |
| lambda: os.environ.get("CUDA_VISIBLE_DEVICES", None), | |
| # timeout for each iteration in the engine | |
| "FASTVIDEO_ENGINE_ITERATION_TIMEOUT_S": | |
| lambda: int(os.environ.get("FASTVIDEO_ENGINE_ITERATION_TIMEOUT_S", "60")), | |
| # Logging configuration | |
| # If set to 0, fastvideo will not configure logging | |
| # If set to 1, fastvideo will configure logging using the default configuration | |
| # or the configuration file specified by FASTVIDEO_LOGGING_CONFIG_PATH | |
| "FASTVIDEO_CONFIGURE_LOGGING": | |
| lambda: int(os.getenv("FASTVIDEO_CONFIGURE_LOGGING", "1")), | |
| "FASTVIDEO_LOGGING_CONFIG_PATH": | |
| lambda: os.getenv("FASTVIDEO_LOGGING_CONFIG_PATH"), | |
| # this is used for configuring the default logging level | |
| "FASTVIDEO_LOGGING_LEVEL": | |
| lambda: os.getenv("FASTVIDEO_LOGGING_LEVEL", "INFO"), | |
| # if set, FASTVIDEO_LOGGING_PREFIX will be prepended to all log messages | |
| "FASTVIDEO_LOGGING_PREFIX": | |
| lambda: os.getenv("FASTVIDEO_LOGGING_PREFIX", ""), | |
| # Trace function calls | |
| # If set to 1, fastvideo will trace function calls | |
| # Useful for debugging | |
| "FASTVIDEO_TRACE_FUNCTION": | |
| lambda: int(os.getenv("FASTVIDEO_TRACE_FUNCTION", "0")), | |
| # Backend for attention computation | |
| # Available options: | |
| # - "TORCH_SDPA": use torch.nn.MultiheadAttention | |
| # - "FLASH_ATTN": use FlashAttention | |
| # - "VIDEO_SPARSE_ATTN": use Video Sparse Attention | |
| # - "SAGE_ATTN": use Sage Attention | |
| # - "SAGE_ATTN_THREE": use Sage Attention 3 | |
| # - "ATTN_QAT_INFER": use the in-repo attn_qat_infer inference backend | |
| # - "ATTN_QAT_TRAIN": use the FastVideoKernel Triton attn_qat_train backend | |
| # - "SPARSE_FP4_COMPRESS_ATTN": FP4 sparse branch + VSA compress branch | |
| "FASTVIDEO_ATTENTION_BACKEND": | |
| lambda: os.getenv("FASTVIDEO_ATTENTION_BACKEND", None), | |
| # Use dedicated multiprocess context for workers. | |
| "FASTVIDEO_WORKER_MULTIPROC_METHOD": | |
| lambda: os.getenv("FASTVIDEO_WORKER_MULTIPROC_METHOD", "spawn"), | |
| # Enables torch profiler if set. Path to the directory where torch profiler | |
| # traces are saved. Note that it must be an absolute path. | |
| "FASTVIDEO_TORCH_PROFILER_DIR": | |
| lambda: (None if os.getenv("FASTVIDEO_TORCH_PROFILER_DIR", None) is None else os.path.expanduser( | |
| os.getenv("FASTVIDEO_TORCH_PROFILER_DIR", "."))), | |
| # Enable torch profiler to record shapes if set | |
| # FASTVIDEO_TORCH_PROFILER_RECORD_SHAPES=1. If not set, torch profiler will | |
| # not record shapes. | |
| "FASTVIDEO_TORCH_PROFILER_RECORD_SHAPES": | |
| lambda: bool(os.getenv("FASTVIDEO_TORCH_PROFILER_RECORD_SHAPES", "0") != "0"), | |
| # Enable torch profiler to profile memory if set | |
| # FASTVIDEO_TORCH_PROFILER_WITH_PROFILE_MEMORY=1. If not set, torch profiler | |
| # will not profile memory. | |
| "FASTVIDEO_TORCH_PROFILER_WITH_PROFILE_MEMORY": | |
| lambda: bool(os.getenv("FASTVIDEO_TORCH_PROFILER_WITH_PROFILE_MEMORY", "0") != "0"), | |
| # Enable torch profiler to profile stack if set | |
| # FASTVIDEO_TORCH_PROFILER_WITH_STACK=1. If not set, torch profiler WILL | |
| # profile stack by default. | |
| "FASTVIDEO_TORCH_PROFILER_WITH_STACK": | |
| lambda: bool(os.getenv("FASTVIDEO_TORCH_PROFILER_WITH_STACK", "1") != "0"), | |
| # Enable torch profiler to profile flops if set | |
| # FASTVIDEO_TORCH_PROFILER_WITH_FLOPS=1. If not set, torch profiler will | |
| # not profile flops. | |
| "FASTVIDEO_TORCH_PROFILER_WITH_FLOPS": | |
| lambda: bool(os.getenv("FASTVIDEO_TORCH_PROFILER_WITH_FLOPS", "0") != "0"), | |
| # Wait steps per profiling cycle (torch.profiler.schedule wait parameter) | |
| # Defaults to 2 if not set. | |
| "FASTVIDEO_TORCH_PROFILER_WAIT_STEPS": | |
| lambda: int(os.getenv("FASTVIDEO_TORCH_PROFILER_WAIT_STEPS", "2")), | |
| # Warmup steps per profiling cycle (torch.profiler.schedule warmup parameter) | |
| # Defaults to 1 if not set. | |
| "FASTVIDEO_TORCH_PROFILER_WARMUP_STEPS": | |
| lambda: int(os.getenv("FASTVIDEO_TORCH_PROFILER_WARMUP_STEPS", "1")), | |
| # Active steps per profiling cycle (torch.profiler.schedule active parameter) | |
| # Defaults to 2 if not set. | |
| "FASTVIDEO_TORCH_PROFILER_ACTIVE_STEPS": | |
| lambda: int(os.getenv("FASTVIDEO_TORCH_PROFILER_ACTIVE_STEPS", "2")), | |
| "FASTVIDEO_TORCH_PROFILE_REGIONS": | |
| lambda: os.getenv("FASTVIDEO_TORCH_PROFILE_REGIONS", ""), | |
| # If set, fastvideo will run in development mode, which will enable | |
| # some additional endpoints for developing and debugging, | |
| # e.g. `/reset_prefix_cache` | |
| "FASTVIDEO_SERVER_DEV_MODE": | |
| lambda: bool(int(os.getenv("FASTVIDEO_SERVER_DEV_MODE", "0"))), | |
| # If set, fastvideo will enable stage logging, which will print the time | |
| # taken for each stage | |
| "FASTVIDEO_STAGE_LOGGING": | |
| lambda: bool(int(os.getenv("FASTVIDEO_STAGE_LOGGING", "0"))), | |
| } | |
| # end-env-vars-definition | |
| def __getattr__(name: str): | |
| # lazy evaluation of environment variables | |
| if name in environment_variables: | |
| return environment_variables[name]() | |
| raise AttributeError(f"module {__name__!r} has no attribute {name!r}") | |
| def __dir__(): | |
| return list(environment_variables.keys()) | |