KevinNg99 commited on
Commit
3a7bd2c
·
1 Parent(s): f75dc22

update README

Browse files
Files changed (2) hide show
  1. README.md +3 -3
  2. README_CN.md +3 -3
README.md CHANGED
@@ -228,11 +228,11 @@ OUTPUT_PATH=./outputs/output.mp4
228
  # Configuration
229
  N_INFERENCE_GPU=8 # Parallel inference GPU count
230
  CFG_DISTILLED=true # Inference with CFG distilled model, 2x speedup
231
- SPARSE_ATTN=false # Inference with sparse attention
232
  SAGE_ATTN=false # Inference with SageAttention
233
- MODEL_PATH=ckpts # Path to pretrained model
234
- REWRITE=true # Enable prompt rewriting
235
  OVERLAP_GROUP_OFFLOADING=true # Only valid when group offloading is enabled, significantly increases CPU memory usage but speeds up inference
 
236
 
237
  torchrun --nproc_per_node=$N_INFERENCE_GPU generate.py \
238
  --prompt "$PROMPT" \
 
228
  # Configuration
229
  N_INFERENCE_GPU=8 # Parallel inference GPU count
230
  CFG_DISTILLED=true # Inference with CFG distilled model, 2x speedup
231
+ SPARSE_ATTN=false # Inference with sparse attention (only 720p models are equipped with sparse attention). Please ensure flex-block-attn is installed
232
  SAGE_ATTN=false # Inference with SageAttention
233
+ REWRITE=true # Enable prompt rewriting. Please ensure rewrite vLLM server is deployed and configured.
 
234
  OVERLAP_GROUP_OFFLOADING=true # Only valid when group offloading is enabled, significantly increases CPU memory usage but speeds up inference
235
+ MODEL_PATH=ckpts # Path to pretrained model
236
 
237
  torchrun --nproc_per_node=$N_INFERENCE_GPU generate.py \
238
  --prompt "$PROMPT" \
README_CN.md CHANGED
@@ -215,11 +215,11 @@ OUTPUT_PATH=./outputs/output.mp4
215
  # 配置
216
  N_INFERENCE_GPU=8 # 并行推理 GPU 数量
217
  CFG_DISTILLED=true # 使用 CFG 蒸馏模型进行推理,2倍加速
218
- SPARSE_ATTN=false # 使用稀疏注意力进行推理
219
  SAGE_ATTN=false # 使用 SageAttention 进行推理
220
- MODEL_PATH=ckpts # 预训练模型路径
221
- REWRITE=true # 启用提示词重写
222
  OVERLAP_GROUP_OFFLOADING=true # 仅在组卸载启用时有效,会显著增加 CPU 内存占用,但能够提速
 
223
 
224
  torchrun --nproc_per_node=$N_INFERENCE_GPU generate.py \
225
  --prompt "$PROMPT" \
 
215
  # 配置
216
  N_INFERENCE_GPU=8 # 并行推理 GPU 数量
217
  CFG_DISTILLED=true # 使用 CFG 蒸馏模型进行推理,2倍加速
218
+ SPARSE_ATTN=false # 使用稀疏注意力进行推理(仅 720p 模型配备了稀疏注意力)。请确保 flex-block-attn 已安装
219
  SAGE_ATTN=false # 使用 SageAttention 进行推理
220
+ REWRITE=true # 启用提示词重写。请确保 rewrite vLLM server 已部署和配置。
 
221
  OVERLAP_GROUP_OFFLOADING=true # 仅在组卸载启用时有效,会显著增加 CPU 内存占用,但能够提速
222
+ MODEL_PATH=ckpts # 预训练模型路径
223
 
224
  torchrun --nproc_per_node=$N_INFERENCE_GPU generate.py \
225
  --prompt "$PROMPT" \