2.28 kB

	#!/usr/bin/env bash

	set -euo pipefail

	PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
	cd "$PROJECT_ROOT"

	export MASTER_ADDR="${MASTER_ADDR:-localhost}"
	export MASTER_PORT="${MASTER_PORT:-6009}"
	export NNODES="${NNODES:-1}"
	export NODE_RANK="${NODE_RANK:-0}"
	export GPUS_PER_NODE="${GPUS_PER_NODE:-1}"
	export WORLD_SIZE="$((GPUS_PER_NODE * NNODES))"

	export PYTORCH_CUDA_ALLOC_CONF="${PYTORCH_CUDA_ALLOC_CONF:-expandable_segments:True}"
	export NCCL_ALGO="${NCCL_ALGO:-^NVLS}"
	export PYTHONPATH="${PROJECT_ROOT}:${PYTHONPATH:-}"

	DISTRIBUTED_ARGS="--nnodes=${NNODES} --node_rank=${NODE_RANK} --nproc_per_node=${GPUS_PER_NODE} --rdzv-backend=c10d --rdzv-endpoint=${MASTER_ADDR}:${MASTER_PORT}"

	# ==============================================================================================
	# RUNNING ON CONSUMER GPUs (e.g., RTX 5090)
	# ==============================================================================================
	# If you want to run this script on a consumer GPU, please follow these steps to avoid OOM errors:
	#
	# 1. Define MAGI_COMPILER_OFFLOAD_ARGS and append it to the `torchrun` command below.
	# 2. Update `engine_config.cp_size` in `config.json` to exactly match the number of GPUs on your machine.
	# 3. Depending on your NUMA node configuration, use `numactl` as a prefix to optimize memory bandwidth:
	# - If spanning multiple NUMA nodes: `numactl --interleave=all`
	# - If on a single NUMA node: `numactl --cpunodebind=$NUMA_NODE --membind=$NUMA_NODE`
	#
	# --- Example Usage ---
	# MAGI_COMPILER_OFFLOAD_ARGS="--offload_config.model_cpu_offload --offload_config.gpu_resident_weight_ratio 0.35 --offload_config.offload_policy HEURISTIC"
	# numactl --interleave=all torchrun ${DISTRIBUTED_ARGS} inference/pipeline/entry.py ... $MAGI_COMPILER_OFFLOAD_ARGS
	# ==============================================================================================

	torchrun ${DISTRIBUTED_ARGS} inference/pipeline/entry.py \
	--config-load-path example/base/config.json \
	--prompt "$(<example/assets/prompt.txt)" \
	--image_path example/assets/image.png \
	--seconds 4 \
	--br_width 448 \
	--br_height 256 \
	--output_path "output_example_base_$(date '+%Y%m%d_%H%M%S')" \
	2>&1 \| tee "log_example_base_$(date '+%Y%m%d_%H%M%S').log"

Xet Storage Details

Size:: 2.28 kB
Xet hash:: c18e0e2afa5a2c928e54b8c965c0321f81044c74033201ca1bc3ca85f440dd5c

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.