update README
Browse files- README.md +3 -3
- README_CN.md +3 -3
README.md
CHANGED
|
@@ -228,11 +228,11 @@ OUTPUT_PATH=./outputs/output.mp4
|
|
| 228 |
# Configuration
|
| 229 |
N_INFERENCE_GPU=8 # Parallel inference GPU count
|
| 230 |
CFG_DISTILLED=true # Inference with CFG distilled model, 2x speedup
|
| 231 |
-
SPARSE_ATTN=false # Inference with sparse attention
|
| 232 |
SAGE_ATTN=false # Inference with SageAttention
|
| 233 |
-
|
| 234 |
-
REWRITE=true # Enable prompt rewriting
|
| 235 |
OVERLAP_GROUP_OFFLOADING=true # Only valid when group offloading is enabled, significantly increases CPU memory usage but speeds up inference
|
|
|
|
| 236 |
|
| 237 |
torchrun --nproc_per_node=$N_INFERENCE_GPU generate.py \
|
| 238 |
--prompt "$PROMPT" \
|
|
|
|
| 228 |
# Configuration
|
| 229 |
N_INFERENCE_GPU=8 # Parallel inference GPU count
|
| 230 |
CFG_DISTILLED=true # Inference with CFG distilled model, 2x speedup
|
| 231 |
+
SPARSE_ATTN=false # Inference with sparse attention (only 720p models are equipped with sparse attention). Please ensure flex-block-attn is installed
|
| 232 |
SAGE_ATTN=false # Inference with SageAttention
|
| 233 |
+
REWRITE=true # Enable prompt rewriting. Please ensure rewrite vLLM server is deployed and configured.
|
|
|
|
| 234 |
OVERLAP_GROUP_OFFLOADING=true # Only valid when group offloading is enabled, significantly increases CPU memory usage but speeds up inference
|
| 235 |
+
MODEL_PATH=ckpts # Path to pretrained model
|
| 236 |
|
| 237 |
torchrun --nproc_per_node=$N_INFERENCE_GPU generate.py \
|
| 238 |
--prompt "$PROMPT" \
|
README_CN.md
CHANGED
|
@@ -215,11 +215,11 @@ OUTPUT_PATH=./outputs/output.mp4
|
|
| 215 |
# 配置
|
| 216 |
N_INFERENCE_GPU=8 # 并行推理 GPU 数量
|
| 217 |
CFG_DISTILLED=true # 使用 CFG 蒸馏模型进行推理,2倍加速
|
| 218 |
-
SPARSE_ATTN=false #
|
| 219 |
SAGE_ATTN=false # 使用 SageAttention 进行推理
|
| 220 |
-
|
| 221 |
-
REWRITE=true # 启用提示词重写
|
| 222 |
OVERLAP_GROUP_OFFLOADING=true # 仅在组卸载启用时有效,会显著增加 CPU 内存占用,但能够提速
|
|
|
|
| 223 |
|
| 224 |
torchrun --nproc_per_node=$N_INFERENCE_GPU generate.py \
|
| 225 |
--prompt "$PROMPT" \
|
|
|
|
| 215 |
# 配置
|
| 216 |
N_INFERENCE_GPU=8 # 并行推理 GPU 数量
|
| 217 |
CFG_DISTILLED=true # 使用 CFG 蒸馏模型进行推理,2倍加速
|
| 218 |
+
SPARSE_ATTN=false # 使用稀疏注意力进行推理(仅 720p 模型配备了稀疏注意力)。请确保 flex-block-attn 已安装
|
| 219 |
SAGE_ATTN=false # 使用 SageAttention 进行推理
|
| 220 |
+
REWRITE=true # 启用提示词重写。请确保 rewrite vLLM server 已部署和配置。
|
|
|
|
| 221 |
OVERLAP_GROUP_OFFLOADING=true # 仅在组卸载启用时有效,会显著增加 CPU 内存占用,但能够提速
|
| 222 |
+
MODEL_PATH=ckpts # 预训练模型路径
|
| 223 |
|
| 224 |
torchrun --nproc_per_node=$N_INFERENCE_GPU generate.py \
|
| 225 |
--prompt "$PROMPT" \
|