| export PATH=$PWD/fireredasr/:$PWD/fireredasr/utils/:$PATH | |
| export PYTHONPATH=$PWD/:$PYTHONPATH | |
| # model_path=pretrained_models/FireRedASR-AED-L | |
| # python3 export_encoder_tensorrt.py \ | |
| # --model-dir $model_path \ | |
| # --tensorrt-model-dir $TRT_ENGINE_OUTPUT_DIR \ | |
| # --trt-engine-file-name encoder.plan | |
| TRT_ENGINE_OUTPUT_DIR=./FireRedASR-AED-L-TensorRT | |
| python3 export_encoder_tensorrt.py \ | |
| --onnx-model-path $TRT_ENGINE_OUTPUT_DIR/encoder.fp16.onnx \ | |
| --tensorrt-model-dir $TRT_ENGINE_OUTPUT_DIR \ | |
| --trt-engine-file-name encoder.plan | |
| INFERENCE_PRECISION=float16 | |
| MAX_BEAM_WIDTH=4 | |
| MAX_BATCH_SIZE=64 | |
| checkpoint_dir=$TRT_ENGINE_OUTPUT_DIR/tllm_checkpoint_float16 | |
| output_dir=$TRT_ENGINE_OUTPUT_DIR/trt_engine_${INFERENCE_PRECISION} | |
| # model_path=pretrained_models/FireRedASR-AED-L/model.pth.tar | |
| # python3 convert_checkpoint.py \ | |
| # --dtype ${INFERENCE_PRECISION} \ | |
| # --model_path $model_path \ | |
| # --output_dir $checkpoint_dir | |
| trtllm-build --checkpoint_dir ${checkpoint_dir}/decoder \ | |
| --output_dir ${output_dir}/decoder \ | |
| --moe_plugin disable \ | |
| --max_beam_width ${MAX_BEAM_WIDTH} \ | |
| --max_batch_size ${MAX_BATCH_SIZE} \ | |
| --max_seq_len 512 \ | |
| --max_input_len 4 \ | |
| --max_encoder_input_len 1024 \ | |
| --gemm_plugin ${INFERENCE_PRECISION} \ | |
| --remove_input_padding disable \ | |
| --paged_kv_cache disable \ | |
| --gpt_attention_plugin ${INFERENCE_PRECISION} | |
| # FireRedASR-AED-L-TensorRT/ | |
| # βββ encoder.fp16.onnx | |
| # βββ encoder.plan | |
| # βββ tllm_checkpoint_float16 | |
| # βΒ Β βββ decoder | |
| # βΒ Β βββ config.json | |
| # βΒ Β βββ rank0.safetensors | |
| # βββ trt_engine_float16 | |
| # βββ decoder | |
| # βββ config.json | |
| # βββ rank0.engine |