Commit ·
e53235c
0
Parent(s):
Super-squash branch 'main' using huggingface_hub
Browse files- .gitattributes +79 -0
- .gitignore +25 -0
- 2dfan4.onnx +3 -0
- README.md +246 -0
- bisenet_resnet_18.onnx +3 -0
- bisenet_resnet_34.onnx +3 -0
- buffalo_l/1k3d68.onnx +3 -0
- buffalo_l/2d106det.onnx +3 -0
- buffalo_l/det_10g.onnx +3 -0
- buffalo_l/det_10g_fp16.onnx +3 -0
- buffalo_l/genderage.onnx +3 -0
- buffalo_l/w600k_r50.onnx +3 -0
- dfl_xseg.onnx +3 -0
- gfpgan/GFPGANv1.4.pth +3 -0
- gfpgan/weights/detection_Resnet50_Final.pth +3 -0
- gfpgan/weights/parsing_parsenet.pth +3 -0
- hyperswap_1a_256.onnx +3 -0
- hyperswap_1b_256.onnx +3 -0
- hyperswap_1c_256.onnx +3 -0
- inswapper_128.onnx +3 -0
- inswapper_128_fp16.onnx +3 -0
- runtime_kit/cuda-12.9-py312/develop/manifest.json +13 -0
- runtime_kit/cuda-12.9-py312/develop/models.tar.zst +3 -0
- runtime_kit/cuda-12.9-py312/develop/runtime_kit.tar.zst +3 -0
- scripts/convert_scrfd_fp16.py +76 -0
- trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_17833951080834725538_0_0_fp16_sm89.engine +3 -0
- trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_17833951080834725538_0_0_fp16_sm89.profile +0 -0
- trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_8113506114015737638_0_0_fp16_sm89.engine +3 -0
- trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_9597588243303553259_0_0_fp16_sm89.engine +3 -0
- trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_9597588243303553259_0_0_fp16_sm89.profile +0 -0
- trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_mxnet_converted_model_11107014985443681405_0_0_fp16_sm89.engine +3 -0
- trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_mxnet_converted_model_11107014985443681405_0_0_fp16_sm89.profile +0 -0
- trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_167800414563226528_0_0_fp16_sm89.engine +3 -0
- trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_17996549027508307148_0_0_fp16_sm89.engine +3 -0
- trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_17996549027508307148_0_0_fp16_sm89.profile +0 -0
- trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_2345837266770369871_0_0_fp16_sm89.engine +3 -0
- trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_2345837266770369871_0_0_fp16_sm89.profile +0 -0
- trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch_jit_10294570408813474267_0_0_sm89.engine +3 -0
- trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_cache_sm89.timing +3 -0
- trt_cache/sm89/trt10.9_ort1.24/manifest.json +24 -0
- xseg_1.onnx +3 -0
- xseg_2.onnx +3 -0
- xseg_3.onnx +3 -0
- yolov8n.onnx +3 -0
.gitattributes
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_17996549027508307148_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_2345837266770369871_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_mxnet_converted_model_11107014985443681405_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_mxnet_converted_model_4040390242348433159_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch_jit_10294570408813474267_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_tf2onnx_4378169159435062660_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_tf2onnx_11561260756091584865_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_tf2onnx_13398278383551544223_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_17833951080834725538_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_9597588243303553259_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_167800414563226528_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_mxnet_converted_model_14612954359127931697_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_8113506114015737638_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_7756492850973980650_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_9817536908852838938_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_mxnet_converted_model_2943710519155863468_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_mxnet_converted_model_18208867741518846433_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_mxnet_converted_model_3270097384199296458_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch_jit_3022756569470766001_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch_jit_17194444465592813910_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_1803974360890584330_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_15578803181408430984_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_17990913744339786239_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_10057366433567039755_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_tf2onnx_10825663542842851070_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_tf2onnx_13448473176397120007_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_tf2onnx_13522256330928318305_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_13447028154422237108_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_7950080726743702987_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_13100297186191859674_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_cache_sm89.timing filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch_jit_943447651177039798_0_0_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch_jit_10294570408813474267_0_0_fp16_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_167800414563226528_0_0_fp16_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_17996549027508307148_0_0_fp16_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch_jit_943447651177039798_0_0_fp16_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_tf2onnx_4378169159435062660_0_0_fp16_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_tf2onnx_11561260756091584865_0_0_fp16_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_tf2onnx_13398278383551544223_0_0_fp16_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_17833951080834725538_0_0_fp16_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_9597588243303553259_0_0_fp16_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_mxnet_converted_model_11107014985443681405_0_0_fp16_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_2345837266770369871_0_0_fp16_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_8113506114015737638_0_0_fp16_sm89.engine filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# IDE
|
| 2 |
+
.idea/
|
| 3 |
+
.vscode/
|
| 4 |
+
*.swp
|
| 5 |
+
*.swo
|
| 6 |
+
|
| 7 |
+
# Python
|
| 8 |
+
__pycache__/
|
| 9 |
+
*.py[cod]
|
| 10 |
+
*$py.class
|
| 11 |
+
*.so
|
| 12 |
+
.Python
|
| 13 |
+
.env
|
| 14 |
+
.venv
|
| 15 |
+
venv/
|
| 16 |
+
ENV/
|
| 17 |
+
|
| 18 |
+
# OS
|
| 19 |
+
.DS_Store
|
| 20 |
+
Thumbs.db
|
| 21 |
+
|
| 22 |
+
# Temp files
|
| 23 |
+
*.tmp
|
| 24 |
+
*.temp
|
| 25 |
+
*.log
|
2dfan4.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:678c6fa539d52335a31c980feefdf4a6e02d781d83dce00af8a894f114557285
|
| 3 |
+
size 97904803
|
README.md
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
tags:
|
| 4 |
+
- face-swap
|
| 5 |
+
- face-enhancement
|
| 6 |
+
- face-detection
|
| 7 |
+
- face-parsing
|
| 8 |
+
- face-mask
|
| 9 |
+
- face-segmentation
|
| 10 |
+
- person-detection
|
| 11 |
+
- tensorrt
|
| 12 |
+
- deep-learning
|
| 13 |
+
- computer-vision
|
| 14 |
+
- morphstream
|
| 15 |
+
---
|
| 16 |
+
|
| 17 |
+
# MorphStream Models
|
| 18 |
+
|
| 19 |
+
Models and TensorRT engine cache for real-time face processing used by [MorphStream](https://morphstream.ai) GPU Worker.
|
| 20 |
+
|
| 21 |
+
**Private repository** — requires access token for downloads.
|
| 22 |
+
|
| 23 |
+
## Structure
|
| 24 |
+
|
| 25 |
+
```
|
| 26 |
+
/
|
| 27 |
+
├── inswapper_128.onnx # Standard face swap (529MB)
|
| 28 |
+
├── inswapper_128_fp16.onnx # FP16 optimized - default (265MB)
|
| 29 |
+
├── hyperswap_1a_256.onnx # HyperSwap variant A (384MB)
|
| 30 |
+
├── hyperswap_1b_256.onnx # HyperSwap variant B (384MB)
|
| 31 |
+
├── hyperswap_1c_256.onnx # HyperSwap variant C (384MB)
|
| 32 |
+
├── yolov8n.onnx # Person detection (12MB)
|
| 33 |
+
├── dfl_xseg.onnx # XSeg v1 face segmentation — legacy (67MB)
|
| 34 |
+
├── xseg_1.onnx # XSeg occlusion model 1 (67MB)
|
| 35 |
+
├── xseg_2.onnx # XSeg occlusion model 2 (67MB)
|
| 36 |
+
├── xseg_3.onnx # XSeg occlusion model 3 (67MB)
|
| 37 |
+
├── 2dfan4.onnx # 68-point face landmarks (93MB)
|
| 38 |
+
├── bisenet_resnet_34.onnx # BiSeNet face parsing ResNet-34 (89MB)
|
| 39 |
+
├── bisenet_resnet_18.onnx # BiSeNet face parsing ResNet-18 (51MB)
|
| 40 |
+
├── buffalo_l/ # Direct ONNX face analysis models
|
| 41 |
+
│ ├── det_10g.onnx # SCRFD face detection FP32 (16MB)
|
| 42 |
+
│ ├── det_10g_fp16.onnx # SCRFD face detection FP16 (8.1MB)
|
| 43 |
+
│ ├── w600k_r50.onnx # ArcFace recognition embeddings (166MB)
|
| 44 |
+
│ ├── 1k3d68.onnx # 3D landmarks, 68 points (137MB)
|
| 45 |
+
│ ├── 2d106det.onnx # 2D landmarks, 106 points (4.8MB)
|
| 46 |
+
│ └── genderage.onnx # Gender/age estimation (1.3MB)
|
| 47 |
+
├── gfpgan/ # Face enhancement (not used in real-time)
|
| 48 |
+
│ ├── GFPGANv1.4.pth
|
| 49 |
+
│ └── weights/
|
| 50 |
+
│ ├── detection_Resnet50_Final.pth
|
| 51 |
+
│ └── parsing_parsenet.pth
|
| 52 |
+
├── trt_cache/ # Pre-compiled TensorRT engines
|
| 53 |
+
│ ├── sm89/trt10.9_ort1.24/ # RTX 4090
|
| 54 |
+
│ ├── sm86/trt10.9_ort1.24/ # RTX 3090
|
| 55 |
+
│ └── ... # Other GPU arch + version combos
|
| 56 |
+
└── scripts/
|
| 57 |
+
└── convert_scrfd_fp16.py # FP32 → FP16 conversion utility
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
## Face Swap Models
|
| 61 |
+
|
| 62 |
+
| Model | Description | Size | Input | Format |
|
| 63 |
+
|-------|-------------|------|-------|--------|
|
| 64 |
+
| `inswapper_128.onnx` | Standard quality | 529 MB | 128px | ONNX FP32 |
|
| 65 |
+
| `inswapper_128_fp16.onnx` | FP16 optimized (**default**) | 265 MB | 128px | ONNX FP16 |
|
| 66 |
+
| `hyperswap_1a_256.onnx` | High quality — variant A | 384 MB | 256px | ONNX FP32 |
|
| 67 |
+
| `hyperswap_1b_256.onnx` | High quality — variant B | 384 MB | 256px | ONNX FP32 |
|
| 68 |
+
| `hyperswap_1c_256.onnx` | High quality — variant C | 384 MB | 256px | ONNX FP32 |
|
| 69 |
+
|
| 70 |
+
## Face Analysis (buffalo_l)
|
| 71 |
+
|
| 72 |
+
Models originally from [InsightFace](https://github.com/deepinsight/insightface) buffalo_l pack. GPU Worker loads them directly via ONNX Runtime (DirectSCRFD, DirectArcFace, DirectLandmark106) without the InsightFace Python library.
|
| 73 |
+
|
| 74 |
+
| Model | GPU Worker Class | Description | Size |
|
| 75 |
+
|-------|-----------------|-------------|------|
|
| 76 |
+
| `det_10g.onnx` | `DirectSCRFD` | SCRFD face detection (FP32) | 16 MB |
|
| 77 |
+
| `det_10g_fp16.onnx` | `DirectSCRFD` | SCRFD face detection (FP16, ~2x faster on Tensor Cores) | 8.1 MB |
|
| 78 |
+
| `w600k_r50.onnx` | `DirectArcFace` | ArcFace R50 face recognition embeddings | 166 MB |
|
| 79 |
+
| `2d106det.onnx` | `DirectLandmark106` | 2D face landmarks (106 points), CLAHE + face angle rotation. Used in face detection pipeline; 106-pt landmarks serve as fallback for masking when 68-pt unavailable | 4.8 MB |
|
| 80 |
+
| `1k3d68.onnx` | — | 3D face landmarks (68 points) — not used at runtime | 137 MB |
|
| 81 |
+
| `genderage.onnx` | — | Gender and age estimation — not used at runtime | 1.3 MB |
|
| 82 |
+
|
| 83 |
+
## Face Landmarks
|
| 84 |
+
|
| 85 |
+
| Model | Description | Size | Input |
|
| 86 |
+
|-------|-------------|------|-------|
|
| 87 |
+
| `2dfan4.onnx` | 2DFAN4 — 68-point face landmarks | 93 MB | 256px |
|
| 88 |
+
|
| 89 |
+
FaceFusion-style 5/68 refinement: SCRFD detects face + coarse 5 kps, then 2DFAN4 produces 68 precise landmarks, converted to 5 alignment points (eye centers from 6 points each, exact nose tip, exact mouth corners). Improves face alignment quality for swap models.
|
| 90 |
+
|
| 91 |
+
**Primary landmark model for face masking**: 68-pt landmarks from 2DFAN4 are the preferred source for `custom_paste_back` compositing (hull, cutouts, mouth blend). 106-pt landmarks from `2d106det.onnx` serve as fallback. Dual-landmark support: `has_valid_68` preferred, `has_valid_106` fallback, `use_68` flag propagated through all mask functions. Landmarks are temporally smoothed via One Euro Filter in `LandmarkSmoother` (attribute `face.landmark_2d_68`).
|
| 92 |
+
|
| 93 |
+
Source: [FaceFusion assets](https://github.com/facefusion/facefusion-assets).
|
| 94 |
+
|
| 95 |
+
## Person Detection
|
| 96 |
+
|
| 97 |
+
| Model | Description | Size | Input |
|
| 98 |
+
|-------|-------------|------|-------|
|
| 99 |
+
| `yolov8n.onnx` | YOLOv8n — person detection (COCO class 0) | 12 MB | 640px |
|
| 100 |
+
|
| 101 |
+
Used to distinguish "person left frame" vs "face occluded" during face swap.
|
| 102 |
+
|
| 103 |
+
## Face Mask Models (FaceFusion 4-Mask System)
|
| 104 |
+
|
| 105 |
+
Occlusion detection (XSeg) and semantic face parsing (BiSeNet) models for composable mask pipeline.
|
| 106 |
+
Used in GPU Worker's `face_masker.py` for box/occlusion/area/region masks.
|
| 107 |
+
|
| 108 |
+
Source: [FaceFusion 3.x assets](https://github.com/facefusion/facefusion-assets) (Apache-2.0), mirrored here for reliability.
|
| 109 |
+
|
| 110 |
+
### XSeg — Occlusion Detection
|
| 111 |
+
|
| 112 |
+
| Model | Description | Size | Input | Output |
|
| 113 |
+
|-------|-------------|------|-------|--------|
|
| 114 |
+
| `dfl_xseg.onnx` | XSeg v1 — legacy binary face mask (not used) | 67 MB | 256px | binary (face/bg) |
|
| 115 |
+
| `xseg_1.onnx` | XSeg model 1 — occlusion detection | 67 MB | 256px | binary (face/bg) |
|
| 116 |
+
| `xseg_2.onnx` | XSeg model 2 — occlusion detection | 67 MB | 256px | binary (face/bg) |
|
| 117 |
+
| `xseg_3.onnx` | XSeg model 3 — occlusion detection | 67 MB | 256px | binary (face/bg) |
|
| 118 |
+
|
| 119 |
+
Runtime model selection via IPC: `many` (all 3 intersected), `xseg_1`, `xseg_2`, `xseg_3`.
|
| 120 |
+
Input: NHWC float32 [0,1]. Output: intersection of all selected model masks (most conservative).
|
| 121 |
+
|
| 122 |
+
### BiSeNet — Region Segmentation
|
| 123 |
+
|
| 124 |
+
| Model | Description | Size | Input | Classes |
|
| 125 |
+
|-------|-------------|------|-------|---------|
|
| 126 |
+
| `bisenet_resnet_34.onnx` | BiSeNet ResNet-34 (**default**) | 89 MB | 512px | 19 regions |
|
| 127 |
+
| `bisenet_resnet_18.onnx` | BiSeNet ResNet-18 (lighter) | 51 MB | 512px | 19 regions |
|
| 128 |
+
|
| 129 |
+
Runtime model selection via IPC. Input: NCHW float32 ImageNet-normalized.
|
| 130 |
+
10 configurable face regions: skin, left-eyebrow, right-eyebrow, left-eye, right-eye, glasses, upper-lip, nose, lower-lip, mouth.
|
| 131 |
+
|
| 132 |
+
## TensorRT Engine Cache
|
| 133 |
+
|
| 134 |
+
Pre-compiled TensorRT engines stored in `trt_cache/` subfolder, keyed by GPU architecture and software versions. Eliminates cold-start TRT compilation (~180-300s) on new GPU instances.
|
| 135 |
+
|
| 136 |
+
### Layout
|
| 137 |
+
|
| 138 |
+
```
|
| 139 |
+
trt_cache/
|
| 140 |
+
├── sm89/trt10.9_ort1.24/ # RTX 4090 (Ada Lovelace)
|
| 141 |
+
│ ├── manifest.json # Metadata: cache_key, engine list, timestamps
|
| 142 |
+
│ ├── TensorrtExecutionProvider_*.engine # Compiled TRT engines
|
| 143 |
+
│ ├── TensorrtExecutionProvider_*.profile # Profiling data
|
| 144 |
+
│ └── timing.cache # cuDNN/TRT timing optimization cache
|
| 145 |
+
├── sm86/trt10.9_ort1.24/ # RTX 3090 (Ampere)
|
| 146 |
+
│ └── ...
|
| 147 |
+
└── sm80/trt10.9_ort1.24/ # A100 (Ampere)
|
| 148 |
+
└── ...
|
| 149 |
+
```
|
| 150 |
+
|
| 151 |
+
### Cache Key
|
| 152 |
+
|
| 153 |
+
Format: `{gpu_arch}/trt{trt_version}_ort{ort_version}`
|
| 154 |
+
|
| 155 |
+
| Component | Example | Source |
|
| 156 |
+
|-----------|---------|--------|
|
| 157 |
+
| `gpu_arch` | `sm89` | `nvidia-smi --query-gpu=compute_cap` → `8.9` → `sm89` |
|
| 158 |
+
| `trt_version` | `10.9` | `tensorrt.__version__` major.minor |
|
| 159 |
+
| `ort_version` | `1.24` | `onnxruntime.__version__` major.minor |
|
| 160 |
+
|
| 161 |
+
### Lifecycle
|
| 162 |
+
|
| 163 |
+
1. **Download** — at container boot, GPU Worker checks HF for matching cache key. If found, downloads all engines (~10-30s vs ~180-300s compile).
|
| 164 |
+
2. **Compile** — if no cache on HF, ONNX Runtime compiles TRT engines from scratch on first model load.
|
| 165 |
+
3. **Self-seed upload** — after compilation, engines are uploaded to HF so future instances skip compilation.
|
| 166 |
+
4. **Incremental upload** — if engines were downloaded from HF but new models compiled locally after (e.g., YOLOv8n during warmup), only the new engines are uploaded.
|
| 167 |
+
|
| 168 |
+
### manifest.json
|
| 169 |
+
|
| 170 |
+
```json
|
| 171 |
+
{
|
| 172 |
+
"cache_key": "sm89/trt10.9_ort1.24",
|
| 173 |
+
"gpu_arch": "sm89",
|
| 174 |
+
"trt_version": "10.9",
|
| 175 |
+
"ort_version": "1.24",
|
| 176 |
+
"created_at": "2025-03-07T12:00:00Z",
|
| 177 |
+
"machine_id": "C.12345",
|
| 178 |
+
"engine_files": [
|
| 179 |
+
"TensorrtExecutionProvider_model_hash.engine",
|
| 180 |
+
"TensorrtExecutionProvider_model_hash.profile",
|
| 181 |
+
"timing.cache"
|
| 182 |
+
]
|
| 183 |
+
}
|
| 184 |
+
```
|
| 185 |
+
|
| 186 |
+
Manifest serves as both metadata and upload gate — its presence signals that cache was downloaded, and `engine_files` list enables incremental upload detection.
|
| 187 |
+
|
| 188 |
+
## GFPGAN (optional, not used in real-time)
|
| 189 |
+
|
| 190 |
+
Face restoration and enhancement. Too slow for real-time streaming (~50-150ms per frame).
|
| 191 |
+
|
| 192 |
+
| Model | Description | Size |
|
| 193 |
+
|-------|-------------|------|
|
| 194 |
+
| `gfpgan/GFPGANv1.4.pth` | GFPGAN v1.4 restoration | 332 MB |
|
| 195 |
+
| `gfpgan/weights/detection_Resnet50_Final.pth` | RetinaFace detector | 104 MB |
|
| 196 |
+
| `gfpgan/weights/parsing_parsenet.pth` | ParseNet segmentation | 81 MB |
|
| 197 |
+
|
| 198 |
+
## Usage
|
| 199 |
+
|
| 200 |
+
### GPU Worker (production)
|
| 201 |
+
|
| 202 |
+
Models are baked into the Docker image at build time (buffalo_l + default swap + landmark + mask models). Alternative swap models (HyperSwap) are downloaded on-demand by `ModelDownloadService`.
|
| 203 |
+
|
| 204 |
+
TRT engine cache is downloaded asynchronously at boot via `trt_cache.py` (non-blocking — `/health` responds immediately).
|
| 205 |
+
|
| 206 |
+
```bash
|
| 207 |
+
# Manual download (local development)
|
| 208 |
+
HF_TOKEN=hf_xxx ./scripts/download_models.sh /models
|
| 209 |
+
```
|
| 210 |
+
|
| 211 |
+
### Docker build
|
| 212 |
+
|
| 213 |
+
```bash
|
| 214 |
+
docker build --build-arg HF_TOKEN=hf_xxx -t morphstream-gpu-worker .
|
| 215 |
+
```
|
| 216 |
+
|
| 217 |
+
### Python (huggingface_hub)
|
| 218 |
+
|
| 219 |
+
```python
|
| 220 |
+
from huggingface_hub import hf_hub_download
|
| 221 |
+
|
| 222 |
+
model_path = hf_hub_download(
|
| 223 |
+
repo_id="latark/MorphStream",
|
| 224 |
+
filename="inswapper_128_fp16.onnx",
|
| 225 |
+
token="hf_xxx"
|
| 226 |
+
)
|
| 227 |
+
```
|
| 228 |
+
|
| 229 |
+
## Scripts
|
| 230 |
+
|
| 231 |
+
### convert_scrfd_fp16.py
|
| 232 |
+
|
| 233 |
+
Converts SCRFD det_10g.onnx from FP32 to FP16:
|
| 234 |
+
|
| 235 |
+
```bash
|
| 236 |
+
pip install onnx onnxconverter-common
|
| 237 |
+
python scripts/convert_scrfd_fp16.py \
|
| 238 |
+
--input buffalo_l/det_10g.onnx \
|
| 239 |
+
--output buffalo_l/det_10g_fp16.onnx
|
| 240 |
+
```
|
| 241 |
+
|
| 242 |
+
Key: `op_block_list=['BatchNormalization']` prevents epsilon underflow (1e-5 → 0 in FP16 → NaN).
|
| 243 |
+
|
| 244 |
+
## License
|
| 245 |
+
|
| 246 |
+
MIT License
|
bisenet_resnet_18.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2218b6183c26ca5c83303232d682a536c670c13ea9695f716c777d1f244eefe9
|
| 3 |
+
size 53205356
|
bisenet_resnet_34.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a0b8c958a3c938913bd06a8365dbb3c8761afba6ecbf0d14b3b1f77eb230c96
|
| 3 |
+
size 93632546
|
buffalo_l/1k3d68.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df5c06b8a0c12e422b2ed8947b8869faa4105387f199c477af038aa01f9a45cc
|
| 3 |
+
size 143607619
|
buffalo_l/2d106det.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf
|
| 3 |
+
size 5030888
|
buffalo_l/det_10g.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91
|
| 3 |
+
size 16923827
|
buffalo_l/det_10g_fp16.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:504a01ae20e82125ef4819fd469886bc267ec7a3681078fc40b4a604f4cd9269
|
| 3 |
+
size 8477747
|
buffalo_l/genderage.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb
|
| 3 |
+
size 1322532
|
buffalo_l/w600k_r50.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c06341c33c2ca1f86781dab0e829f88ad5b64be9fba56e56bc9ebdefc619e43
|
| 3 |
+
size 174383860
|
dfl_xseg.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af105ae257170fdbc6a03460327b88d5c0b9a659aa4384fb8686ceead7294ad8
|
| 3 |
+
size 70343569
|
gfpgan/GFPGANv1.4.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2cd4703ab14f4d01fd1383a8a8b266f9a5833dacee8e6a79d3bf21a1b6be5ad
|
| 3 |
+
size 348632874
|
gfpgan/weights/detection_Resnet50_Final.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d1de9c2944f2ccddca5f5e010ea5ae64a39845a86311af6fdf30841b0a5a16d
|
| 3 |
+
size 109497761
|
gfpgan/weights/parsing_parsenet.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d558d8d0e42c20224f13cf5a29c79eba2d59913419f945545d8cf7b72920de2
|
| 3 |
+
size 85331193
|
hyperswap_1a_256.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0e98a8a03a238f461ed3d2570e426b49f46745ee400854a60dceeb70c246add
|
| 3 |
+
size 402742682
|
hyperswap_1b_256.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5124031789c42f71b9558fb71954ef7aedb6da7ed9fac79293e23c61a792a73e
|
| 3 |
+
size 402742682
|
hyperswap_1c_256.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5528c2d76fe9986c99d829278987ef9f3a630cb606db7628d02b57b330f406a5
|
| 3 |
+
size 402742682
|
inswapper_128.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4a3f08c753cb72d04e10aa0f7dbe3deebbf39567d4ead6dce08e98aa49e16af
|
| 3 |
+
size 554253681
|
inswapper_128_fp16.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d51a9278a1f650cffefc18ba53f38bf2769bf4bbff89267822cf72945f8a38b
|
| 3 |
+
size 277680638
|
runtime_kit/cuda-12.9-py312/develop/manifest.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": "develop-c3fe843",
|
| 3 |
+
"created_at": "2026-03-16T23:49:03Z",
|
| 4 |
+
"base_image": "vastai/base-image:cuda-12.9-mini-py312",
|
| 5 |
+
"cuda_compat": "12.6-13.1",
|
| 6 |
+
"components": {
|
| 7 |
+
"runtime_kit": {
|
| 8 |
+
"file": "runtime_kit.tar.zst",
|
| 9 |
+
"sha256": "a2d516a767982bdacabce600c4d8c74b677213160c34bee4eca41dfd6a30d8cd",
|
| 10 |
+
"size_bytes": 5117579370
|
| 11 |
+
}
|
| 12 |
+
}
|
| 13 |
+
}
|
runtime_kit/cuda-12.9-py312/develop/models.tar.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99b022f8850a40549fc2eaa04675dbc2840ff3e5277389d12f1a267eccdec04e
|
| 3 |
+
size 976835217
|
runtime_kit/cuda-12.9-py312/develop/runtime_kit.tar.zst
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2d516a767982bdacabce600c4d8c74b677213160c34bee4eca41dfd6a30d8cd
|
| 3 |
+
size 5117579370
|
scripts/convert_scrfd_fp16.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Convert SCRFD det_10g.onnx from FP32 to FP16.
|
| 3 |
+
|
| 4 |
+
Usage:
|
| 5 |
+
pip install onnx onnxconverter-common
|
| 6 |
+
python scripts/convert_scrfd_fp16.py \
|
| 7 |
+
--input /path/to/det_10g.onnx \
|
| 8 |
+
--output /path/to/det_10g_fp16.onnx
|
| 9 |
+
|
| 10 |
+
Design decisions:
|
| 11 |
+
- op_block_list=['BatchNormalization'] — epsilon 1e-5 underflows to 0 in FP16 → NaN.
|
| 12 |
+
Keeping BN in FP32 prevents this while still converting ~95% of ops to FP16.
|
| 13 |
+
- keep_io_types=True — Input/output remain float32 for compatibility.
|
| 14 |
+
No preprocessing changes needed in SCRFD pipeline.
|
| 15 |
+
- onnx.checker validates structural integrity after conversion.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import argparse
|
| 19 |
+
import sys
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def convert_fp16(input_path: str, output_path: str) -> None:
|
| 24 |
+
"""Convert ONNX model from FP32 to FP16."""
|
| 25 |
+
try:
|
| 26 |
+
import onnx
|
| 27 |
+
from onnxconverter_common import float16
|
| 28 |
+
except ImportError:
|
| 29 |
+
print("Missing dependencies. Install:")
|
| 30 |
+
print(" pip install onnx onnxconverter-common")
|
| 31 |
+
sys.exit(1)
|
| 32 |
+
|
| 33 |
+
input_file = Path(input_path)
|
| 34 |
+
if not input_file.exists():
|
| 35 |
+
print(f"Input file not found: {input_path}")
|
| 36 |
+
sys.exit(1)
|
| 37 |
+
|
| 38 |
+
print(f"Loading {input_path} ...")
|
| 39 |
+
model = onnx.load(input_path)
|
| 40 |
+
|
| 41 |
+
input_size_mb = input_file.stat().st_size / (1024 * 1024)
|
| 42 |
+
print(f" Input size: {input_size_mb:.1f} MB")
|
| 43 |
+
print(f" Opset version: {model.opset_import[0].version}")
|
| 44 |
+
|
| 45 |
+
# Convert to FP16 with BatchNormalization excluded
|
| 46 |
+
# BN epsilon (1e-5) underflows to 0 in FP16 → division by zero → NaN
|
| 47 |
+
print("Converting to FP16 (excluding BatchNormalization) ...")
|
| 48 |
+
model_fp16 = float16.convert_float_to_float16(
|
| 49 |
+
model,
|
| 50 |
+
op_block_list=["BatchNormalization"],
|
| 51 |
+
keep_io_types=True,
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
# Validate
|
| 55 |
+
print("Validating converted model ...")
|
| 56 |
+
onnx.checker.check_model(model_fp16)
|
| 57 |
+
|
| 58 |
+
# Save
|
| 59 |
+
output_file = Path(output_path)
|
| 60 |
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
| 61 |
+
onnx.save(model_fp16, output_path)
|
| 62 |
+
|
| 63 |
+
output_size_mb = output_file.stat().st_size / (1024 * 1024)
|
| 64 |
+
ratio = output_size_mb / input_size_mb * 100
|
| 65 |
+
print(f" Output size: {output_size_mb:.1f} MB ({ratio:.0f}% of original)")
|
| 66 |
+
print(f" Saved to: {output_path}")
|
| 67 |
+
print("Done.")
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
if __name__ == "__main__":
|
| 71 |
+
parser = argparse.ArgumentParser(description="Convert SCRFD det_10g.onnx FP32 → FP16")
|
| 72 |
+
parser.add_argument("--input", required=True, help="Path to FP32 det_10g.onnx")
|
| 73 |
+
parser.add_argument("--output", required=True, help="Output path for FP16 model")
|
| 74 |
+
args = parser.parse_args()
|
| 75 |
+
|
| 76 |
+
convert_fp16(args.input, args.output)
|
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_17833951080834725538_0_0_fp16_sm89.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f51d27c083f869b3818be05df14f218ecd44f4b8920943cc14a5cfdc11a24319
|
| 3 |
+
size 51294308
|
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_17833951080834725538_0_0_fp16_sm89.profile
ADDED
|
Binary file (21 Bytes). View file
|
|
|
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_8113506114015737638_0_0_fp16_sm89.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e4438b7c4f2d2f85b41203911355537091d9fbec0d8f4524698fa2aeae027d3b
|
| 3 |
+
size 10357980
|
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_9597588243303553259_0_0_fp16_sm89.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fee5a0dde13de567891a93125a2ebe96d5a62a1bd5e98f16716bf78a4f71948
|
| 3 |
+
size 31027988
|
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_main_graph_9597588243303553259_0_0_fp16_sm89.profile
ADDED
|
Binary file (21 Bytes). View file
|
|
|
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_mxnet_converted_model_11107014985443681405_0_0_fp16_sm89.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c53297674775156338551acdc8e22ea51d3c17bd76783d8f45461cfd3b7209c
|
| 3 |
+
size 4299956
|
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_mxnet_converted_model_11107014985443681405_0_0_fp16_sm89.profile
ADDED
|
Binary file (20 Bytes). View file
|
|
|
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_167800414563226528_0_0_fp16_sm89.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb5dc75cdeff4915c75ef732661f99b634b0e4b04a2766f4bf15392d0865c429
|
| 3 |
+
size 57522548
|
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_17996549027508307148_0_0_fp16_sm89.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55d76dfa9db33da731ff0a249d7e696e92d97aef40e296ac4b13c6923b429d9b
|
| 3 |
+
size 9784060
|
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_17996549027508307148_0_0_fp16_sm89.profile
ADDED
|
Binary file (36 Bytes). View file
|
|
|
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_2345837266770369871_0_0_fp16_sm89.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7424d4aa0a572f18284db645928f4677c522fb98ea1141e9afd1f7463827d9e
|
| 3 |
+
size 90437652
|
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_2345837266770369871_0_0_fp16_sm89.profile
ADDED
|
Binary file (23 Bytes). View file
|
|
|
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_TRTKernel_graph_torch_jit_10294570408813474267_0_0_sm89.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e2901b8b90663ac30a7001037447cf171afec18e914267a4b0d7a44477ff3bf
|
| 3 |
+
size 281999132
|
trt_cache/sm89/trt10.9_ort1.24/TensorrtExecutionProvider_cache_sm89.timing
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29bdb94cc5924532a547a58b25994327d063e5ecf424c4eedb98815842f5c88c
|
| 3 |
+
size 24462901
|
trt_cache/sm89/trt10.9_ort1.24/manifest.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cache_key": "sm89/trt10.9_ort1.24",
|
| 3 |
+
"gpu_arch": "sm89",
|
| 4 |
+
"trt_version": "10.9",
|
| 5 |
+
"ort_version": "1.24",
|
| 6 |
+
"created_at": "2026-03-17T01:15:18Z",
|
| 7 |
+
"machine_id": "C.32987251",
|
| 8 |
+
"engine_files": [
|
| 9 |
+
"TensorrtExecutionProvider_TRTKernel_graph_main_graph_17833951080834725538_0_0_fp16_sm89.engine",
|
| 10 |
+
"TensorrtExecutionProvider_TRTKernel_graph_main_graph_17833951080834725538_0_0_fp16_sm89.profile",
|
| 11 |
+
"TensorrtExecutionProvider_TRTKernel_graph_main_graph_8113506114015737638_0_0_fp16_sm89.engine",
|
| 12 |
+
"TensorrtExecutionProvider_TRTKernel_graph_main_graph_9597588243303553259_0_0_fp16_sm89.engine",
|
| 13 |
+
"TensorrtExecutionProvider_TRTKernel_graph_main_graph_9597588243303553259_0_0_fp16_sm89.profile",
|
| 14 |
+
"TensorrtExecutionProvider_TRTKernel_graph_mxnet_converted_model_11107014985443681405_0_0_fp16_sm89.engine",
|
| 15 |
+
"TensorrtExecutionProvider_TRTKernel_graph_mxnet_converted_model_11107014985443681405_0_0_fp16_sm89.profile",
|
| 16 |
+
"TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_167800414563226528_0_0_fp16_sm89.engine",
|
| 17 |
+
"TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_17996549027508307148_0_0_fp16_sm89.engine",
|
| 18 |
+
"TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_17996549027508307148_0_0_fp16_sm89.profile",
|
| 19 |
+
"TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_2345837266770369871_0_0_fp16_sm89.engine",
|
| 20 |
+
"TensorrtExecutionProvider_TRTKernel_graph_torch-jit-export_2345837266770369871_0_0_fp16_sm89.profile",
|
| 21 |
+
"TensorrtExecutionProvider_TRTKernel_graph_torch_jit_10294570408813474267_0_0_sm89.engine",
|
| 22 |
+
"TensorrtExecutionProvider_cache_sm89.timing"
|
| 23 |
+
]
|
| 24 |
+
}
|
xseg_1.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4d1498b8a03b5fe2a3a5d2ef2a0402ab03bd51edaf5b2d8d5fb764702a97dd3
|
| 3 |
+
size 70324286
|
xseg_2.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd9a0879eaf43841d765472cf1f8c330dbf9dcb03da0eace93e95f3bcc399042
|
| 3 |
+
size 70324286
|
xseg_3.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48ccd7e8541e159a5a754ec9e62df2f12065f7df8f9af842c1750342c6533559
|
| 3 |
+
size 70327709
|
yolov8n.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d767becf196208a6739a25cfcd19819714a6db637fc179acb5890133de9b363b
|
| 3 |
+
size 12851047
|