# SPDX-License-Identifier: Apache-2.0
# Adapted from vllm: https://github.com/vllm-project/vllm/blob/v0.7.3/vllm/envs.py

import os
from collections.abc import Callable
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
    FASTVIDEO_RINGBUFFER_WARNING_INTERVAL: int = 60
    FASTVIDEO_NCCL_SO_PATH: str | None = None
    LD_LIBRARY_PATH: str | None = None
    LOCAL_RANK: int = 0
    CUDA_VISIBLE_DEVICES: str | None = None
    FASTVIDEO_CACHE_ROOT: str = os.path.expanduser("~/.cache/fastvideo")
    FASTVIDEO_CONFIG_ROOT: str = os.path.expanduser("~/.config/fastvideo")
    FASTVIDEO_CONFIGURE_LOGGING: int = 1
    FASTVIDEO_RAY_PER_WORKER_GPUS: float = 1.0
    FASTVIDEO_LOGGING_LEVEL: str = "INFO"
    FASTVIDEO_LOGGING_PREFIX: str = ""
    FASTVIDEO_LOGGING_CONFIG_PATH: str | None = None
    FASTVIDEO_TRACE_FUNCTION: int = 0
    FASTVIDEO_ATTENTION_BACKEND: str | None = None
    FASTVIDEO_WORKER_MULTIPROC_METHOD: str = "spawn"
    FASTVIDEO_TARGET_DEVICE: str = "cuda"
    MAX_JOBS: str | None = None
    NVCC_THREADS: str | None = None
    CMAKE_BUILD_TYPE: str | None = None
    VERBOSE: bool = False
    FASTVIDEO_TORCH_PROFILER_DIR: str | None = None
    FASTVIDEO_TORCH_PROFILER_RECORD_SHAPES: bool = False
    FASTVIDEO_TORCH_PROFILER_WITH_PROFILE_MEMORY: bool = False
    FASTVIDEO_TORCH_PROFILER_WITH_STACK: bool = True
    FASTVIDEO_TORCH_PROFILER_WITH_FLOPS: bool = False
    FASTVIDEO_TORCH_PROFILER_WAIT_STEPS: int = 2
    FASTVIDEO_TORCH_PROFILER_WARMUP_STEPS: int = 1
    FASTVIDEO_TORCH_PROFILER_ACTIVE_STEPS: int = 2
    FASTVIDEO_TORCH_PROFILE_REGIONS: str = ""
    FASTVIDEO_SERVER_DEV_MODE: bool = False
    FASTVIDEO_STAGE_LOGGING: bool = False
    FASTVIDEO_HOST_IP: str = ""
    FASTVIDEO_LOOPBACK_IP: str = ""


def get_default_cache_root() -> str:
    return os.getenv(
        "XDG_CACHE_HOME",
        os.path.join(os.path.expanduser("~"), ".cache"),
    )


def get_default_config_root() -> str:
    return os.getenv(
        "XDG_CONFIG_HOME",
        os.path.join(os.path.expanduser("~"), ".config"),
    )


def maybe_convert_int(value: str | None) -> int | None:
    if value is None:
        return None
    return int(value)


# The begin-* and end* here are used by the documentation generator
# to extract the used env vars.

# begin-env-vars-definition

environment_variables: dict[str, Callable[[], Any]] = {

    # ================== Installation Time Env Vars ==================

    # Target device of FastVideo, supporting [cuda (by default),
    # rocm, neuron, cpu, openvino]
    "FASTVIDEO_TARGET_DEVICE":
    lambda: os.getenv("FASTVIDEO_TARGET_DEVICE", "cuda"),

    # Maximum number of compilation jobs to run in parallel.
    # By default this is the number of CPUs
    "MAX_JOBS":
    lambda: os.getenv("MAX_JOBS", None),

    # Number of threads to use for nvcc
    # By default this is 1.
    # If set, `MAX_JOBS` will be reduced to avoid oversubscribing the CPU.
    "NVCC_THREADS":
    lambda: os.getenv("NVCC_THREADS", None),

    # If set, fastvideo will use precompiled binaries (*.so)
    "FASTVIDEO_USE_PRECOMPILED":
    lambda: bool(os.environ.get("FASTVIDEO_USE_PRECOMPILED")) or bool(
        os.environ.get("FASTVIDEO_PRECOMPILED_WHEEL_LOCATION")),

    # CMake build type
    # If not set, defaults to "Debug" or "RelWithDebInfo"
    # Available options: "Debug", "Release", "RelWithDebInfo"
    "CMAKE_BUILD_TYPE":
    lambda: os.getenv("CMAKE_BUILD_TYPE"),

    # If set, fastvideo will print verbose logs during installation
    "VERBOSE":
    lambda: bool(int(os.getenv('VERBOSE', '0'))),

    # Root directory for FASTVIDEO configuration files
    # Defaults to `~/.config/fastvideo` unless `XDG_CONFIG_HOME` is set
    # Note that this not only affects how fastvideo finds its configuration files
    # during runtime, but also affects how fastvideo installs its configuration
    # files during **installation**.
    "FASTVIDEO_CONFIG_ROOT":
    lambda: os.path.expanduser(
        os.getenv(
            "FASTVIDEO_CONFIG_ROOT",
            os.path.join(get_default_config_root(), "fastvideo"),
        )),

    # ================== Runtime Env Vars ==================

    # Root directory for FASTVIDEO cache files
    # Defaults to `~/.cache/fastvideo` unless `XDG_CACHE_HOME` is set
    "FASTVIDEO_CACHE_ROOT":
    lambda: os.path.expanduser(os.getenv(
        "FASTVIDEO_CACHE_ROOT",
        os.path.join(get_default_cache_root(), "fastvideo"),
    )),

    # used in distributed environment to determine the ip address
    # of the current node, when the node has multiple network interfaces.
    # If you are using multi-node inference, you should set this differently
    # on each node.
    "FASTVIDEO_HOST_IP":
    lambda: os.getenv("FASTVIDEO_HOST_IP", ""),

    # Used to force set up loopback IP
    "FASTVIDEO_LOOPBACK_IP":
    lambda: os.getenv("FASTVIDEO_LOOPBACK_IP", ""),

    # Number of GPUs per worker in Ray, if it is set to be a fraction,
    # it allows ray to schedule multiple actors on a single GPU,
    # so that users can colocate other actors on the same GPUs as FastVideo.
    "FASTVIDEO_RAY_PER_WORKER_GPUS":
    lambda: float(os.getenv("FASTVIDEO_RAY_PER_WORKER_GPUS", "1.0")),

    # Interval in seconds to log a warning message when the ring buffer is full
    "FASTVIDEO_RINGBUFFER_WARNING_INTERVAL":
    lambda: int(os.environ.get("FASTVIDEO_RINGBUFFER_WARNING_INTERVAL", "60")),

    # Path to the NCCL library file. It is needed because nccl>=2.19 brought
    # by PyTorch contains a bug: https://github.com/NVIDIA/nccl/issues/1234
    "FASTVIDEO_NCCL_SO_PATH":
    lambda: os.environ.get("FASTVIDEO_NCCL_SO_PATH", None),

    # when `FASTVIDEO_NCCL_SO_PATH` is not set, fastvideo will try to find the nccl
    # library file in the locations specified by `LD_LIBRARY_PATH`
    "LD_LIBRARY_PATH":
    lambda: os.environ.get("LD_LIBRARY_PATH", None),

    # Internal flag to enable Dynamo fullgraph capture
    "FASTVIDEO_TEST_DYNAMO_FULLGRAPH_CAPTURE":
    lambda: bool(os.environ.get("FASTVIDEO_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"),

    # local rank of the process in the distributed setting, used to determine
    # the GPU device id
    "LOCAL_RANK":
    lambda: int(os.environ.get("LOCAL_RANK", "0")),

    # used to control the visible devices in the distributed setting
    "CUDA_VISIBLE_DEVICES":
    lambda: os.environ.get("CUDA_VISIBLE_DEVICES", None),

    # timeout for each iteration in the engine
    "FASTVIDEO_ENGINE_ITERATION_TIMEOUT_S":
    lambda: int(os.environ.get("FASTVIDEO_ENGINE_ITERATION_TIMEOUT_S", "60")),

    # Logging configuration
    # If set to 0, fastvideo will not configure logging
    # If set to 1, fastvideo will configure logging using the default configuration
    #    or the configuration file specified by FASTVIDEO_LOGGING_CONFIG_PATH
    "FASTVIDEO_CONFIGURE_LOGGING":
    lambda: int(os.getenv("FASTVIDEO_CONFIGURE_LOGGING", "1")),
    "FASTVIDEO_LOGGING_CONFIG_PATH":
    lambda: os.getenv("FASTVIDEO_LOGGING_CONFIG_PATH"),

    # this is used for configuring the default logging level
    "FASTVIDEO_LOGGING_LEVEL":
    lambda: os.getenv("FASTVIDEO_LOGGING_LEVEL", "INFO"),

    # if set, FASTVIDEO_LOGGING_PREFIX will be prepended to all log messages
    "FASTVIDEO_LOGGING_PREFIX":
    lambda: os.getenv("FASTVIDEO_LOGGING_PREFIX", ""),

    # Trace function calls
    # If set to 1, fastvideo will trace function calls
    # Useful for debugging
    "FASTVIDEO_TRACE_FUNCTION":
    lambda: int(os.getenv("FASTVIDEO_TRACE_FUNCTION", "0")),

    # Backend for attention computation
    # Available options:
    # - "TORCH_SDPA": use torch.nn.MultiheadAttention
    # - "FLASH_ATTN": use FlashAttention
    # - "VIDEO_SPARSE_ATTN": use Video Sparse Attention
    # - "SAGE_ATTN": use Sage Attention
    # - "SAGE_ATTN_THREE": use Sage Attention 3
    # - "ATTN_QAT_INFER": use the in-repo attn_qat_infer inference backend
    # - "ATTN_QAT_TRAIN": use the FastVideoKernel Triton attn_qat_train backend
    # - "SPARSE_FP4_COMPRESS_ATTN": FP4 sparse branch + VSA compress branch
    "FASTVIDEO_ATTENTION_BACKEND":
    lambda: os.getenv("FASTVIDEO_ATTENTION_BACKEND", None),

    # Use dedicated multiprocess context for workers.
    "FASTVIDEO_WORKER_MULTIPROC_METHOD":
    lambda: os.getenv("FASTVIDEO_WORKER_MULTIPROC_METHOD", "spawn"),

    # Enables torch profiler if set. Path to the directory where torch profiler
    # traces are saved. Note that it must be an absolute path.
    "FASTVIDEO_TORCH_PROFILER_DIR":
    lambda: (None if os.getenv("FASTVIDEO_TORCH_PROFILER_DIR", None) is None else os.path.expanduser(
        os.getenv("FASTVIDEO_TORCH_PROFILER_DIR", "."))),

    # Enable torch profiler to record shapes if set
    # FASTVIDEO_TORCH_PROFILER_RECORD_SHAPES=1. If not set, torch profiler will
    # not record shapes.
    "FASTVIDEO_TORCH_PROFILER_RECORD_SHAPES":
    lambda: bool(os.getenv("FASTVIDEO_TORCH_PROFILER_RECORD_SHAPES", "0") != "0"),

    # Enable torch profiler to profile memory if set
    # FASTVIDEO_TORCH_PROFILER_WITH_PROFILE_MEMORY=1. If not set, torch profiler
    # will not profile memory.
    "FASTVIDEO_TORCH_PROFILER_WITH_PROFILE_MEMORY":
    lambda: bool(os.getenv("FASTVIDEO_TORCH_PROFILER_WITH_PROFILE_MEMORY", "0") != "0"),

    # Enable torch profiler to profile stack if set
    # FASTVIDEO_TORCH_PROFILER_WITH_STACK=1. If not set, torch profiler WILL
    # profile stack by default.
    "FASTVIDEO_TORCH_PROFILER_WITH_STACK":
    lambda: bool(os.getenv("FASTVIDEO_TORCH_PROFILER_WITH_STACK", "1") != "0"),

    # Enable torch profiler to profile flops if set
    # FASTVIDEO_TORCH_PROFILER_WITH_FLOPS=1. If not set, torch profiler will
    # not profile flops.
    "FASTVIDEO_TORCH_PROFILER_WITH_FLOPS":
    lambda: bool(os.getenv("FASTVIDEO_TORCH_PROFILER_WITH_FLOPS", "0") != "0"),
    # Wait steps per profiling cycle (torch.profiler.schedule wait parameter)
    # Defaults to 2 if not set.
    "FASTVIDEO_TORCH_PROFILER_WAIT_STEPS":
    lambda: int(os.getenv("FASTVIDEO_TORCH_PROFILER_WAIT_STEPS", "2")),
    # Warmup steps per profiling cycle (torch.profiler.schedule warmup parameter)
    # Defaults to 1 if not set.
    "FASTVIDEO_TORCH_PROFILER_WARMUP_STEPS":
    lambda: int(os.getenv("FASTVIDEO_TORCH_PROFILER_WARMUP_STEPS", "1")),
    # Active steps per profiling cycle (torch.profiler.schedule active parameter)
    # Defaults to 2 if not set.
    "FASTVIDEO_TORCH_PROFILER_ACTIVE_STEPS":
    lambda: int(os.getenv("FASTVIDEO_TORCH_PROFILER_ACTIVE_STEPS", "2")),
    "FASTVIDEO_TORCH_PROFILE_REGIONS":
    lambda: os.getenv("FASTVIDEO_TORCH_PROFILE_REGIONS", ""),

    # If set, fastvideo will run in development mode, which will enable
    # some additional endpoints for developing and debugging,
    # e.g. `/reset_prefix_cache`
    "FASTVIDEO_SERVER_DEV_MODE":
    lambda: bool(int(os.getenv("FASTVIDEO_SERVER_DEV_MODE", "0"))),

    # If set, fastvideo will enable stage logging, which will print the time
    # taken for each stage
    "FASTVIDEO_STAGE_LOGGING":
    lambda: bool(int(os.getenv("FASTVIDEO_STAGE_LOGGING", "0"))),
}

# end-env-vars-definition


def __getattr__(name: str):
    # lazy evaluation of environment variables
    if name in environment_variables:
        return environment_variables[name]()
    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


def __dir__():
    return list(environment_variables.keys())