Buckets:
| set -euo pipefail | |
| PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" | |
| cd "$PROJECT_ROOT" | |
| export MASTER_ADDR="${MASTER_ADDR:-localhost}" | |
| export MASTER_PORT="${MASTER_PORT:-6009}" | |
| export NNODES="${NNODES:-1}" | |
| export NODE_RANK="${NODE_RANK:-0}" | |
| export GPUS_PER_NODE="${GPUS_PER_NODE:-1}" | |
| export WORLD_SIZE="$((GPUS_PER_NODE * NNODES))" | |
| export PYTORCH_CUDA_ALLOC_CONF="${PYTORCH_CUDA_ALLOC_CONF:-expandable_segments:True}" | |
| export NCCL_ALGO="${NCCL_ALGO:-^NVLS}" | |
| export PYTHONPATH="${PROJECT_ROOT}:${PYTHONPATH:-}" | |
| DISTRIBUTED_ARGS="--nnodes=${NNODES} --node_rank=${NODE_RANK} --nproc_per_node=${GPUS_PER_NODE} --rdzv-backend=c10d --rdzv-endpoint=${MASTER_ADDR}:${MASTER_PORT}" | |
| # ============================================================================================== | |
| # RUNNING ON CONSUMER GPUs (e.g., RTX 5090) | |
| # ============================================================================================== | |
| # If you want to run this script on a consumer GPU, please follow these steps to avoid OOM errors: | |
| # | |
| # 1. Define MAGI_COMPILER_OFFLOAD_ARGS and append it to the `torchrun` command below. | |
| # 2. Update `engine_config.cp_size` in `config.json` to exactly match the number of GPUs on your machine. | |
| # 3. Depending on your NUMA node configuration, use `numactl` as a prefix to optimize memory bandwidth: | |
| # - If spanning multiple NUMA nodes: `numactl --interleave=all` | |
| # - If on a single NUMA node: `numactl --cpunodebind=$NUMA_NODE --membind=$NUMA_NODE` | |
| # | |
| # --- Example Usage --- | |
| # MAGI_COMPILER_OFFLOAD_ARGS="--offload_config.model_cpu_offload --offload_config.gpu_resident_weight_ratio 0.35 --offload_config.offload_policy HEURISTIC" | |
| # numactl --interleave=all torchrun ${DISTRIBUTED_ARGS} inference/pipeline/entry.py ... $MAGI_COMPILER_OFFLOAD_ARGS | |
| # ============================================================================================== | |
| torchrun ${DISTRIBUTED_ARGS} inference/pipeline/entry.py \ | |
| --config-load-path example/base/config.json \ | |
| --prompt "$(<example/assets/prompt.txt)" \ | |
| --image_path example/assets/image.png \ | |
| --seconds 4 \ | |
| --br_width 448 \ | |
| --br_height 256 \ | |
| --output_path "output_example_base_$(date '+%Y%m%d_%H%M%S')" \ | |
| 2>&1 | tee "log_example_base_$(date '+%Y%m%d_%H%M%S').log" | |
Xet Storage Details
- Size:
- 2.28 kB
- Xet hash:
- c18e0e2afa5a2c928e54b8c965c0321f81044c74033201ca1bc3ca85f440dd5c
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.