polygraphy run \ ./encoder.onnx \ --fp16 \ --onnxrt \ --trt \ --atol 1e-3 --rtol 1e-3 \ --pool-limit workspace:1000000000 \ --save-engine=./encoder1_fp16.plan \ --verbose \ --onnx-outputs mark all \ --trt-outputs mark all \ --trt-min-shapes \ chunk_xs:[1,67,80] chunk_lens:[1] \ offset:[1,1] att_cache:[1,12,4,80,128] \ cnn_cache:[1,12,256,7] cache_mask:[1,1,80] \ --trt-opt-shapes \ chunk_xs:[16,67,80] chunk_lens:[16] \ offset:[16,1] att_cache:[16,12,4,80,128] \ cnn_cache:[16,12,256,7] cache_mask:[16,1,80] \ --trt-max-shapes \ chunk_xs:[32,67,80] chunk_lens:[32] \ offset:[32,1] att_cache:[32,12,4,80,128] \ cnn_cache:[32,12,256,7] cache_mask:[32,1,80] \ --input-shapes \ chunk_xs:[16,67,80] chunk_lens:[16] \ offset:[16,1] att_cache:[16,12,4,80,128] \ cnn_cache:[16,12,256,7] cache_mask:[16,1,80] \ --validate