Yuekai Zhang commited on
Commit ·
547de86
1
Parent(s): 1b118c4
add essentials
Browse files- README.md +8 -0
- encoder.onnx +3 -0
- requirements.txt +3 -0
- run.sh +28 -0
README.md
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### Reproduce
|
| 2 |
+
|
| 3 |
+
```sh
|
| 4 |
+
|
| 5 |
+
docker run -it --name "oppo" --gpus all --net host nvcr.io/nvidia/tensorrt:23.03-py3
|
| 6 |
+
pip3 install -r requirements.txt
|
| 7 |
+
bash run.sh
|
| 8 |
+
```
|
encoder.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1096f60e4c2f92de7c24892a41fe76af7a391ff64d0b89b0a3c8d2e6d12e1311
|
| 3 |
+
size 72431039
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tensorrt==8.6.0
|
| 2 |
+
onnx
|
| 3 |
+
onnxruntime==1.13.1
|
run.sh
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
polygraphy run \
|
| 2 |
+
./encoder.onnx \
|
| 3 |
+
--fp16 \
|
| 4 |
+
--onnxrt \
|
| 5 |
+
--trt \
|
| 6 |
+
--atol 1e-3 --rtol 1e-3 \
|
| 7 |
+
--pool-limit workspace:1000000000 \
|
| 8 |
+
--save-engine=./encoder1_fp16.plan \
|
| 9 |
+
--verbose \
|
| 10 |
+
--onnx-outputs mark all \
|
| 11 |
+
--trt-outputs mark all \
|
| 12 |
+
--trt-min-shapes \
|
| 13 |
+
chunk_xs:[1,67,80] chunk_lens:[1] \
|
| 14 |
+
offset:[1,1] att_cache:[1,12,4,80,128] \
|
| 15 |
+
cnn_cache:[1,12,256,7] cache_mask:[1,1,80] \
|
| 16 |
+
--trt-opt-shapes \
|
| 17 |
+
chunk_xs:[16,67,80] chunk_lens:[16] \
|
| 18 |
+
offset:[16,1] att_cache:[16,12,4,80,128] \
|
| 19 |
+
cnn_cache:[16,12,256,7] cache_mask:[16,1,80] \
|
| 20 |
+
--trt-max-shapes \
|
| 21 |
+
chunk_xs:[32,67,80] chunk_lens:[32] \
|
| 22 |
+
offset:[32,1] att_cache:[32,12,4,80,128] \
|
| 23 |
+
cnn_cache:[32,12,256,7] cache_mask:[32,1,80] \
|
| 24 |
+
--input-shapes \
|
| 25 |
+
chunk_xs:[16,67,80] chunk_lens:[16] \
|
| 26 |
+
offset:[16,1] att_cache:[16,12,4,80,128] \
|
| 27 |
+
cnn_cache:[16,12,256,7] cache_mask:[16,1,80] \
|
| 28 |
+
--validate
|