yitongl's picture
Upload FastVideo 5090 safetensors checkpoint2950
d4cc469 verified
# SPDX-License-Identifier: Apache-2.0
# Adapted from vllm: https://github.com/vllm-project/vllm/blob/v0.7.3/vllm/envs.py
import os
from collections.abc import Callable
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
FASTVIDEO_RINGBUFFER_WARNING_INTERVAL: int = 60
FASTVIDEO_NCCL_SO_PATH: str | None = None
LD_LIBRARY_PATH: str | None = None
LOCAL_RANK: int = 0
CUDA_VISIBLE_DEVICES: str | None = None
FASTVIDEO_CACHE_ROOT: str = os.path.expanduser("~/.cache/fastvideo")
FASTVIDEO_CONFIG_ROOT: str = os.path.expanduser("~/.config/fastvideo")
FASTVIDEO_CONFIGURE_LOGGING: int = 1
FASTVIDEO_RAY_PER_WORKER_GPUS: float = 1.0
FASTVIDEO_LOGGING_LEVEL: str = "INFO"
FASTVIDEO_LOGGING_PREFIX: str = ""
FASTVIDEO_LOGGING_CONFIG_PATH: str | None = None
FASTVIDEO_TRACE_FUNCTION: int = 0
FASTVIDEO_ATTENTION_BACKEND: str | None = None
FASTVIDEO_WORKER_MULTIPROC_METHOD: str = "spawn"
FASTVIDEO_TARGET_DEVICE: str = "cuda"
MAX_JOBS: str | None = None
NVCC_THREADS: str | None = None
CMAKE_BUILD_TYPE: str | None = None
VERBOSE: bool = False
FASTVIDEO_TORCH_PROFILER_DIR: str | None = None
FASTVIDEO_TORCH_PROFILER_RECORD_SHAPES: bool = False
FASTVIDEO_TORCH_PROFILER_WITH_PROFILE_MEMORY: bool = False
FASTVIDEO_TORCH_PROFILER_WITH_STACK: bool = True
FASTVIDEO_TORCH_PROFILER_WITH_FLOPS: bool = False
FASTVIDEO_TORCH_PROFILER_WAIT_STEPS: int = 2
FASTVIDEO_TORCH_PROFILER_WARMUP_STEPS: int = 1
FASTVIDEO_TORCH_PROFILER_ACTIVE_STEPS: int = 2
FASTVIDEO_TORCH_PROFILE_REGIONS: str = ""
FASTVIDEO_SERVER_DEV_MODE: bool = False
FASTVIDEO_STAGE_LOGGING: bool = False
FASTVIDEO_HOST_IP: str = ""
FASTVIDEO_LOOPBACK_IP: str = ""
def get_default_cache_root() -> str:
return os.getenv(
"XDG_CACHE_HOME",
os.path.join(os.path.expanduser("~"), ".cache"),
)
def get_default_config_root() -> str:
return os.getenv(
"XDG_CONFIG_HOME",
os.path.join(os.path.expanduser("~"), ".config"),
)
def maybe_convert_int(value: str | None) -> int | None:
if value is None:
return None
return int(value)
# The begin-* and end* here are used by the documentation generator
# to extract the used env vars.
# begin-env-vars-definition
environment_variables: dict[str, Callable[[], Any]] = {
# ================== Installation Time Env Vars ==================
# Target device of FastVideo, supporting [cuda (by default),
# rocm, neuron, cpu, openvino]
"FASTVIDEO_TARGET_DEVICE":
lambda: os.getenv("FASTVIDEO_TARGET_DEVICE", "cuda"),
# Maximum number of compilation jobs to run in parallel.
# By default this is the number of CPUs
"MAX_JOBS":
lambda: os.getenv("MAX_JOBS", None),
# Number of threads to use for nvcc
# By default this is 1.
# If set, `MAX_JOBS` will be reduced to avoid oversubscribing the CPU.
"NVCC_THREADS":
lambda: os.getenv("NVCC_THREADS", None),
# If set, fastvideo will use precompiled binaries (*.so)
"FASTVIDEO_USE_PRECOMPILED":
lambda: bool(os.environ.get("FASTVIDEO_USE_PRECOMPILED")) or bool(
os.environ.get("FASTVIDEO_PRECOMPILED_WHEEL_LOCATION")),
# CMake build type
# If not set, defaults to "Debug" or "RelWithDebInfo"
# Available options: "Debug", "Release", "RelWithDebInfo"
"CMAKE_BUILD_TYPE":
lambda: os.getenv("CMAKE_BUILD_TYPE"),
# If set, fastvideo will print verbose logs during installation
"VERBOSE":
lambda: bool(int(os.getenv('VERBOSE', '0'))),
# Root directory for FASTVIDEO configuration files
# Defaults to `~/.config/fastvideo` unless `XDG_CONFIG_HOME` is set
# Note that this not only affects how fastvideo finds its configuration files
# during runtime, but also affects how fastvideo installs its configuration
# files during **installation**.
"FASTVIDEO_CONFIG_ROOT":
lambda: os.path.expanduser(
os.getenv(
"FASTVIDEO_CONFIG_ROOT",
os.path.join(get_default_config_root(), "fastvideo"),
)),
# ================== Runtime Env Vars ==================
# Root directory for FASTVIDEO cache files
# Defaults to `~/.cache/fastvideo` unless `XDG_CACHE_HOME` is set
"FASTVIDEO_CACHE_ROOT":
lambda: os.path.expanduser(os.getenv(
"FASTVIDEO_CACHE_ROOT",
os.path.join(get_default_cache_root(), "fastvideo"),
)),
# used in distributed environment to determine the ip address
# of the current node, when the node has multiple network interfaces.
# If you are using multi-node inference, you should set this differently
# on each node.
"FASTVIDEO_HOST_IP":
lambda: os.getenv("FASTVIDEO_HOST_IP", ""),
# Used to force set up loopback IP
"FASTVIDEO_LOOPBACK_IP":
lambda: os.getenv("FASTVIDEO_LOOPBACK_IP", ""),
# Number of GPUs per worker in Ray, if it is set to be a fraction,
# it allows ray to schedule multiple actors on a single GPU,
# so that users can colocate other actors on the same GPUs as FastVideo.
"FASTVIDEO_RAY_PER_WORKER_GPUS":
lambda: float(os.getenv("FASTVIDEO_RAY_PER_WORKER_GPUS", "1.0")),
# Interval in seconds to log a warning message when the ring buffer is full
"FASTVIDEO_RINGBUFFER_WARNING_INTERVAL":
lambda: int(os.environ.get("FASTVIDEO_RINGBUFFER_WARNING_INTERVAL", "60")),
# Path to the NCCL library file. It is needed because nccl>=2.19 brought
# by PyTorch contains a bug: https://github.com/NVIDIA/nccl/issues/1234
"FASTVIDEO_NCCL_SO_PATH":
lambda: os.environ.get("FASTVIDEO_NCCL_SO_PATH", None),
# when `FASTVIDEO_NCCL_SO_PATH` is not set, fastvideo will try to find the nccl
# library file in the locations specified by `LD_LIBRARY_PATH`
"LD_LIBRARY_PATH":
lambda: os.environ.get("LD_LIBRARY_PATH", None),
# Internal flag to enable Dynamo fullgraph capture
"FASTVIDEO_TEST_DYNAMO_FULLGRAPH_CAPTURE":
lambda: bool(os.environ.get("FASTVIDEO_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"),
# local rank of the process in the distributed setting, used to determine
# the GPU device id
"LOCAL_RANK":
lambda: int(os.environ.get("LOCAL_RANK", "0")),
# used to control the visible devices in the distributed setting
"CUDA_VISIBLE_DEVICES":
lambda: os.environ.get("CUDA_VISIBLE_DEVICES", None),
# timeout for each iteration in the engine
"FASTVIDEO_ENGINE_ITERATION_TIMEOUT_S":
lambda: int(os.environ.get("FASTVIDEO_ENGINE_ITERATION_TIMEOUT_S", "60")),
# Logging configuration
# If set to 0, fastvideo will not configure logging
# If set to 1, fastvideo will configure logging using the default configuration
# or the configuration file specified by FASTVIDEO_LOGGING_CONFIG_PATH
"FASTVIDEO_CONFIGURE_LOGGING":
lambda: int(os.getenv("FASTVIDEO_CONFIGURE_LOGGING", "1")),
"FASTVIDEO_LOGGING_CONFIG_PATH":
lambda: os.getenv("FASTVIDEO_LOGGING_CONFIG_PATH"),
# this is used for configuring the default logging level
"FASTVIDEO_LOGGING_LEVEL":
lambda: os.getenv("FASTVIDEO_LOGGING_LEVEL", "INFO"),
# if set, FASTVIDEO_LOGGING_PREFIX will be prepended to all log messages
"FASTVIDEO_LOGGING_PREFIX":
lambda: os.getenv("FASTVIDEO_LOGGING_PREFIX", ""),
# Trace function calls
# If set to 1, fastvideo will trace function calls
# Useful for debugging
"FASTVIDEO_TRACE_FUNCTION":
lambda: int(os.getenv("FASTVIDEO_TRACE_FUNCTION", "0")),
# Backend for attention computation
# Available options:
# - "TORCH_SDPA": use torch.nn.MultiheadAttention
# - "FLASH_ATTN": use FlashAttention
# - "VIDEO_SPARSE_ATTN": use Video Sparse Attention
# - "SAGE_ATTN": use Sage Attention
# - "SAGE_ATTN_THREE": use Sage Attention 3
# - "ATTN_QAT_INFER": use the in-repo attn_qat_infer inference backend
# - "ATTN_QAT_TRAIN": use the FastVideoKernel Triton attn_qat_train backend
# - "SPARSE_FP4_COMPRESS_ATTN": FP4 sparse branch + VSA compress branch
"FASTVIDEO_ATTENTION_BACKEND":
lambda: os.getenv("FASTVIDEO_ATTENTION_BACKEND", None),
# Use dedicated multiprocess context for workers.
"FASTVIDEO_WORKER_MULTIPROC_METHOD":
lambda: os.getenv("FASTVIDEO_WORKER_MULTIPROC_METHOD", "spawn"),
# Enables torch profiler if set. Path to the directory where torch profiler
# traces are saved. Note that it must be an absolute path.
"FASTVIDEO_TORCH_PROFILER_DIR":
lambda: (None if os.getenv("FASTVIDEO_TORCH_PROFILER_DIR", None) is None else os.path.expanduser(
os.getenv("FASTVIDEO_TORCH_PROFILER_DIR", "."))),
# Enable torch profiler to record shapes if set
# FASTVIDEO_TORCH_PROFILER_RECORD_SHAPES=1. If not set, torch profiler will
# not record shapes.
"FASTVIDEO_TORCH_PROFILER_RECORD_SHAPES":
lambda: bool(os.getenv("FASTVIDEO_TORCH_PROFILER_RECORD_SHAPES", "0") != "0"),
# Enable torch profiler to profile memory if set
# FASTVIDEO_TORCH_PROFILER_WITH_PROFILE_MEMORY=1. If not set, torch profiler
# will not profile memory.
"FASTVIDEO_TORCH_PROFILER_WITH_PROFILE_MEMORY":
lambda: bool(os.getenv("FASTVIDEO_TORCH_PROFILER_WITH_PROFILE_MEMORY", "0") != "0"),
# Enable torch profiler to profile stack if set
# FASTVIDEO_TORCH_PROFILER_WITH_STACK=1. If not set, torch profiler WILL
# profile stack by default.
"FASTVIDEO_TORCH_PROFILER_WITH_STACK":
lambda: bool(os.getenv("FASTVIDEO_TORCH_PROFILER_WITH_STACK", "1") != "0"),
# Enable torch profiler to profile flops if set
# FASTVIDEO_TORCH_PROFILER_WITH_FLOPS=1. If not set, torch profiler will
# not profile flops.
"FASTVIDEO_TORCH_PROFILER_WITH_FLOPS":
lambda: bool(os.getenv("FASTVIDEO_TORCH_PROFILER_WITH_FLOPS", "0") != "0"),
# Wait steps per profiling cycle (torch.profiler.schedule wait parameter)
# Defaults to 2 if not set.
"FASTVIDEO_TORCH_PROFILER_WAIT_STEPS":
lambda: int(os.getenv("FASTVIDEO_TORCH_PROFILER_WAIT_STEPS", "2")),
# Warmup steps per profiling cycle (torch.profiler.schedule warmup parameter)
# Defaults to 1 if not set.
"FASTVIDEO_TORCH_PROFILER_WARMUP_STEPS":
lambda: int(os.getenv("FASTVIDEO_TORCH_PROFILER_WARMUP_STEPS", "1")),
# Active steps per profiling cycle (torch.profiler.schedule active parameter)
# Defaults to 2 if not set.
"FASTVIDEO_TORCH_PROFILER_ACTIVE_STEPS":
lambda: int(os.getenv("FASTVIDEO_TORCH_PROFILER_ACTIVE_STEPS", "2")),
"FASTVIDEO_TORCH_PROFILE_REGIONS":
lambda: os.getenv("FASTVIDEO_TORCH_PROFILE_REGIONS", ""),
# If set, fastvideo will run in development mode, which will enable
# some additional endpoints for developing and debugging,
# e.g. `/reset_prefix_cache`
"FASTVIDEO_SERVER_DEV_MODE":
lambda: bool(int(os.getenv("FASTVIDEO_SERVER_DEV_MODE", "0"))),
# If set, fastvideo will enable stage logging, which will print the time
# taken for each stage
"FASTVIDEO_STAGE_LOGGING":
lambda: bool(int(os.getenv("FASTVIDEO_STAGE_LOGGING", "0"))),
}
# end-env-vars-definition
def __getattr__(name: str):
# lazy evaluation of environment variables
if name in environment_variables:
return environment_variables[name]()
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
def __dir__():
return list(environment_variables.keys())