Upload folder using huggingface_hub
Browse files- .gitattributes +8 -0
- deimv2_dinov3_l_coco.engine +3 -0
- deimv2_dinov3_l_coco.log +320 -0
- deimv2_dinov3_l_coco.onnx +3 -0
- deimv2_dinov3_l_coco.pth +3 -0
- deimv2_dinov3_m_coco.engine +3 -0
- deimv2_dinov3_m_coco.log +364 -0
- deimv2_dinov3_m_coco.onnx +3 -0
- deimv2_dinov3_m_coco.pth +3 -0
- deimv2_dinov3_s_coco.engine +3 -0
- deimv2_dinov3_s_coco.log +408 -0
- deimv2_dinov3_s_coco.onnx +3 -0
- deimv2_dinov3_s_coco.pth +3 -0
- deimv2_dinov3_x_coco.engine +3 -0
- deimv2_dinov3_x_coco.log +294 -0
- deimv2_dinov3_x_coco.onnx +3 -0
- deimv2_dinov3_x_coco.pth +3 -0
- deimv2_hgnetv2_atto_coco.engine +3 -0
- deimv2_hgnetv2_atto_coco.log +586 -0
- deimv2_hgnetv2_atto_coco.onnx +3 -0
- deimv2_hgnetv2_atto_coco.pth +3 -0
- deimv2_hgnetv2_femto_coco.engine +3 -0
- deimv2_hgnetv2_femto_coco.log +590 -0
- deimv2_hgnetv2_femto_coco.onnx +3 -0
- deimv2_hgnetv2_femto_coco.pth +3 -0
- deimv2_hgnetv2_n_coco.engine +3 -0
- deimv2_hgnetv2_n_coco.log +590 -0
- deimv2_hgnetv2_n_coco.onnx +3 -0
- deimv2_hgnetv2_n_coco.pth +3 -0
- deimv2_hgnetv2_pico_coco.engine +3 -0
- deimv2_hgnetv2_pico_coco.log +590 -0
- deimv2_hgnetv2_pico_coco.onnx +3 -0
- deimv2_hgnetv2_pico_coco.pth +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
deimv2_dinov3_l_coco.engine filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
deimv2_dinov3_m_coco.engine filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
deimv2_dinov3_s_coco.engine filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
deimv2_dinov3_x_coco.engine filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
deimv2_hgnetv2_atto_coco.engine filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
deimv2_hgnetv2_femto_coco.engine filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
deimv2_hgnetv2_n_coco.engine filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
deimv2_hgnetv2_pico_coco.engine filter=lfs diff=lfs merge=lfs -text
|
deimv2_dinov3_l_coco.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f488eb8d37c838544afe7422b8d125c40debfe1697e25e03645980529ae48eb7
|
| 3 |
+
size 70274732
|
deimv2_dinov3_l_coco.log
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
&&&& RUNNING TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_dinov3_l_coco.onnx --saveEngine=checkpoints/deimv2_dinov3_l_coco.engine --fp16 --optShapes=images:1x3x640x640,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
| 2 |
+
[01/20/2026-07:00:14] [W] optShapes is being broadcasted to minShapes for tensor orig_target_sizes
|
| 3 |
+
[01/20/2026-07:00:14] [W] optShapes is being broadcasted to maxShapes for tensor orig_target_sizes
|
| 4 |
+
[01/20/2026-07:00:14] [W] optShapes is being broadcasted to minShapes for tensor images
|
| 5 |
+
[01/20/2026-07:00:14] [W] optShapes is being broadcasted to maxShapes for tensor images
|
| 6 |
+
[01/20/2026-07:00:14] [W] Weakly-typed networks have been deprecated in TensorRT. You can use the AutoCast tool (https://nvidia.github.io/TensorRT-Model-Optimizer/guides/8_autocast.html) to convert the network to be strongly typed.
|
| 7 |
+
[01/20/2026-07:00:14] [I] === Model Options ===
|
| 8 |
+
[01/20/2026-07:00:14] [I] Format: ONNX
|
| 9 |
+
[01/20/2026-07:00:14] [I] Model: checkpoints/deimv2_dinov3_l_coco.onnx
|
| 10 |
+
[01/20/2026-07:00:14] [I] Output:
|
| 11 |
+
[01/20/2026-07:00:14] [I] === Build Options ===
|
| 12 |
+
[01/20/2026-07:00:14] [I] Memory Pools: workspace: 4096 MiB, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default, tacticSharedMem: default
|
| 13 |
+
[01/20/2026-07:00:14] [I] avgTiming: 8
|
| 14 |
+
[01/20/2026-07:00:14] [I] Precision: FP32+FP16
|
| 15 |
+
[01/20/2026-07:00:14] [I] LayerPrecisions:
|
| 16 |
+
[01/20/2026-07:00:14] [I] Layer Device Types:
|
| 17 |
+
[01/20/2026-07:00:14] [I] Decomposable Attentions:
|
| 18 |
+
[01/20/2026-07:00:14] [I] Calibration:
|
| 19 |
+
[01/20/2026-07:00:14] [I] Refit: Disabled
|
| 20 |
+
[01/20/2026-07:00:14] [I] Strip weights: Disabled
|
| 21 |
+
[01/20/2026-07:00:14] [I] Version Compatible: Disabled
|
| 22 |
+
[01/20/2026-07:00:14] [I] ONNX Plugin InstanceNorm: Disabled
|
| 23 |
+
[01/20/2026-07:00:14] [I] ONNX kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA flag: Disabled
|
| 24 |
+
[01/20/2026-07:00:14] [I] TensorRT runtime: full
|
| 25 |
+
[01/20/2026-07:00:14] [I] Lean DLL Path:
|
| 26 |
+
[01/20/2026-07:00:14] [I] Tempfile Controls: { in_memory: allow, temporary: allow }
|
| 27 |
+
[01/20/2026-07:00:14] [I] Exclude Lean Runtime: Disabled
|
| 28 |
+
[01/20/2026-07:00:14] [I] Sparsity: Disabled
|
| 29 |
+
[01/20/2026-07:00:14] [I] Safe mode: Disabled
|
| 30 |
+
[01/20/2026-07:00:14] [I] Build DLA standalone loadable: Disabled
|
| 31 |
+
[01/20/2026-07:00:14] [I] Allow GPU fallback for DLA: Disabled
|
| 32 |
+
[01/20/2026-07:00:14] [I] DirectIO mode: Disabled
|
| 33 |
+
[01/20/2026-07:00:14] [I] Restricted mode: Disabled
|
| 34 |
+
[01/20/2026-07:00:14] [I] Skip inference: Disabled
|
| 35 |
+
[01/20/2026-07:00:14] [I] Save engine: checkpoints/deimv2_dinov3_l_coco.engine
|
| 36 |
+
[01/20/2026-07:00:14] [I] Load engine:
|
| 37 |
+
[01/20/2026-07:00:14] [I] Profiling verbosity: 0
|
| 38 |
+
[01/20/2026-07:00:14] [I] Tactic sources: Using default tactic sources
|
| 39 |
+
[01/20/2026-07:00:14] [I] timingCacheMode: local
|
| 40 |
+
[01/20/2026-07:00:14] [I] timingCacheFile:
|
| 41 |
+
[01/20/2026-07:00:14] [I] Enable Compilation Cache: Enabled
|
| 42 |
+
[01/20/2026-07:00:14] [I] Enable Monitor Memory: Disabled
|
| 43 |
+
[01/20/2026-07:00:14] [I] errorOnTimingCacheMiss: Disabled
|
| 44 |
+
[01/20/2026-07:00:14] [I] Preview Features: Use default preview flags.
|
| 45 |
+
[01/20/2026-07:00:14] [I] MaxAuxStreams: -1
|
| 46 |
+
[01/20/2026-07:00:14] [I] BuilderOptimizationLevel: 3
|
| 47 |
+
[01/20/2026-07:00:14] [I] MaxTactics: -1
|
| 48 |
+
[01/20/2026-07:00:14] [I] Calibration Profile Index: 0
|
| 49 |
+
[01/20/2026-07:00:14] [I] Weight Streaming: Disabled
|
| 50 |
+
[01/20/2026-07:00:14] [I] Runtime Platform: Same As Build
|
| 51 |
+
[01/20/2026-07:00:14] [I] Debug Tensors:
|
| 52 |
+
[01/20/2026-07:00:14] [I] Distributive Independence: Disabled
|
| 53 |
+
[01/20/2026-07:00:14] [I] Mark Unfused Tensors As Debug Tensors: Disabled
|
| 54 |
+
[01/20/2026-07:00:14] [I] Input(s)s format: fp32:CHW
|
| 55 |
+
[01/20/2026-07:00:14] [I] Output(s)s format: fp32:CHW
|
| 56 |
+
[01/20/2026-07:00:14] [I] Input build shape (profile 0): images=1x3x640x640+1x3x640x640+1x3x640x640
|
| 57 |
+
[01/20/2026-07:00:14] [I] Input build shape (profile 0): orig_target_sizes=1x2+1x2+1x2
|
| 58 |
+
[01/20/2026-07:00:14] [I] Input calibration shapes: model
|
| 59 |
+
[01/20/2026-07:00:14] [I] === System Options ===
|
| 60 |
+
[01/20/2026-07:00:14] [I] Device: 0
|
| 61 |
+
[01/20/2026-07:00:14] [I] DLACore:
|
| 62 |
+
[01/20/2026-07:00:14] [I] Plugins:
|
| 63 |
+
[01/20/2026-07:00:14] [I] setPluginsToSerialize:
|
| 64 |
+
[01/20/2026-07:00:14] [I] dynamicPlugins:
|
| 65 |
+
[01/20/2026-07:00:14] [I] ignoreParsedPluginLibs: 0
|
| 66 |
+
[01/20/2026-07:00:14] [I]
|
| 67 |
+
[01/20/2026-07:00:14] [I] === Inference Options ===
|
| 68 |
+
[01/20/2026-07:00:14] [I] Batch: Explicit
|
| 69 |
+
[01/20/2026-07:00:14] [I] Input inference shape : orig_target_sizes=1x2
|
| 70 |
+
[01/20/2026-07:00:14] [I] Input inference shape : images=1x3x640x640
|
| 71 |
+
[01/20/2026-07:00:14] [I] Iterations: 10
|
| 72 |
+
[01/20/2026-07:00:14] [I] Duration: 3s (+ 200ms warm up)
|
| 73 |
+
[01/20/2026-07:00:14] [I] Sleep time: 0ms
|
| 74 |
+
[01/20/2026-07:00:14] [I] Idle time: 0ms
|
| 75 |
+
[01/20/2026-07:00:14] [I] Inference Streams: 1
|
| 76 |
+
[01/20/2026-07:00:14] [I] ExposeDMA: Disabled
|
| 77 |
+
[01/20/2026-07:00:14] [I] Data transfers: Enabled
|
| 78 |
+
[01/20/2026-07:00:14] [I] Spin-wait: Disabled
|
| 79 |
+
[01/20/2026-07:00:14] [I] Multithreading: Disabled
|
| 80 |
+
[01/20/2026-07:00:14] [I] CUDA Graph: Disabled
|
| 81 |
+
[01/20/2026-07:00:14] [I] Separate profiling: Disabled
|
| 82 |
+
[01/20/2026-07:00:14] [I] Time Deserialize: Disabled
|
| 83 |
+
[01/20/2026-07:00:14] [I] Time Refit: Disabled
|
| 84 |
+
[01/20/2026-07:00:14] [I] NVTX verbosity: 0
|
| 85 |
+
[01/20/2026-07:00:14] [I] Persistent Cache Ratio: 0
|
| 86 |
+
[01/20/2026-07:00:14] [I] Optimization Profile Index: 0
|
| 87 |
+
[01/20/2026-07:00:14] [I] Weight Streaming Budget: 100.000000%
|
| 88 |
+
[01/20/2026-07:00:14] [I] Inputs:
|
| 89 |
+
[01/20/2026-07:00:14] [I] Debug Tensor Save Destinations:
|
| 90 |
+
[01/20/2026-07:00:14] [I] Dump All Debug Tensor in Formats:
|
| 91 |
+
[01/20/2026-07:00:14] [I] === Reporting Options ===
|
| 92 |
+
[01/20/2026-07:00:14] [I] Verbose: Disabled
|
| 93 |
+
[01/20/2026-07:00:14] [I] Averages: 10 inferences
|
| 94 |
+
[01/20/2026-07:00:14] [I] Percentiles: 90,95,99
|
| 95 |
+
[01/20/2026-07:00:14] [I] Dump refittable layers:Disabled
|
| 96 |
+
[01/20/2026-07:00:14] [I] Dump output: Disabled
|
| 97 |
+
[01/20/2026-07:00:14] [I] Profile: Disabled
|
| 98 |
+
[01/20/2026-07:00:14] [I] Export timing to JSON file:
|
| 99 |
+
[01/20/2026-07:00:14] [I] Export output to JSON file:
|
| 100 |
+
[01/20/2026-07:00:14] [I] Export profile to JSON file:
|
| 101 |
+
[01/20/2026-07:00:14] [I]
|
| 102 |
+
[01/20/2026-07:00:14] [I] === Device Information ===
|
| 103 |
+
[01/20/2026-07:00:14] [I] Available Devices:
|
| 104 |
+
[01/20/2026-07:00:14] [I] Device 0: "NVIDIA GeForce RTX 4090" UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 105 |
+
[01/20/2026-07:00:14] [I] Selected Device: NVIDIA GeForce RTX 4090
|
| 106 |
+
[01/20/2026-07:00:14] [I] Selected Device ID: 0
|
| 107 |
+
[01/20/2026-07:00:14] [I] Selected Device UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 108 |
+
[01/20/2026-07:00:14] [I] Compute Capability: 8.9
|
| 109 |
+
[01/20/2026-07:00:14] [I] SMs: 128
|
| 110 |
+
[01/20/2026-07:00:14] [I] Device Global Memory: 24071 MiB
|
| 111 |
+
[01/20/2026-07:00:14] [I] Shared Memory per SM: 100 KiB
|
| 112 |
+
[01/20/2026-07:00:14] [I] Memory Bus Width: 384 bits (ECC disabled)
|
| 113 |
+
[01/20/2026-07:00:14] [I] Application Compute Clock Rate: 2.52 GHz
|
| 114 |
+
[01/20/2026-07:00:14] [I] Application Memory Clock Rate: 10.501 GHz
|
| 115 |
+
[01/20/2026-07:00:14] [I]
|
| 116 |
+
[01/20/2026-07:00:14] [I] Note: The application clock rates do not reflect the actual clock rates that the GPU is currently running at.
|
| 117 |
+
[01/20/2026-07:00:14] [I]
|
| 118 |
+
[01/20/2026-07:00:14] [I] TensorRT version: 10.14.1
|
| 119 |
+
[01/20/2026-07:00:14] [I] Loading standard plugins
|
| 120 |
+
[01/20/2026-07:00:14] [I] [TRT] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 29, GPU 10549 (MiB)
|
| 121 |
+
[01/20/2026-07:00:14] [I] Start parsing network model.
|
| 122 |
+
[01/20/2026-07:00:14] [I] [TRT] ----------------------------------------------------------------
|
| 123 |
+
[01/20/2026-07:00:14] [I] [TRT] Input filename: checkpoints/deimv2_dinov3_l_coco.onnx
|
| 124 |
+
[01/20/2026-07:00:14] [I] [TRT] ONNX IR version: 0.0.8
|
| 125 |
+
[01/20/2026-07:00:14] [I] [TRT] Opset version: 17
|
| 126 |
+
[01/20/2026-07:00:14] [I] [TRT] Producer name: pytorch
|
| 127 |
+
[01/20/2026-07:00:14] [I] [TRT] Producer version: 2.10.0
|
| 128 |
+
[01/20/2026-07:00:14] [I] [TRT] Domain:
|
| 129 |
+
[01/20/2026-07:00:14] [I] [TRT] Model version: 0
|
| 130 |
+
[01/20/2026-07:00:14] [I] [TRT] Doc string:
|
| 131 |
+
[01/20/2026-07:00:14] [I] [TRT] ----------------------------------------------------------------
|
| 132 |
+
[01/20/2026-07:00:14] [W] [TRT] ModelImporter.cpp:661: Make sure input orig_target_sizes has Int64 binding.
|
| 133 |
+
[01/20/2026-07:00:14] [W] [TRT] ModelImporter.cpp:908: Make sure output labels has Int64 binding.
|
| 134 |
+
[01/20/2026-07:00:14] [I] Finished parsing network model. Parse time: 0.181841
|
| 135 |
+
[01/20/2026-07:00:14] [I] Set shape of input tensor images for optimization profile 0 to: MIN=1x3x640x640 OPT=1x3x640x640 MAX=1x3x640x640
|
| 136 |
+
[01/20/2026-07:00:14] [I] Set shape of input tensor orig_target_sizes for optimization profile 0 to: MIN=1x2 OPT=1x2 MAX=1x2
|
| 137 |
+
[01/20/2026-07:00:15] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +203, GPU +4, now: CPU 661, GPU 10553 (MiB)
|
| 138 |
+
[01/20/2026-07:00:15] [W] [TRT] Detected layernorm nodes in FP16.
|
| 139 |
+
[01/20/2026-07:00:15] [W] [TRT] Running layernorm after self-attention with FP16 Reduce or Pow may cause overflow. Forcing Reduce or Pow Layers in FP32 precision, or exporting the model to use INormalizationLayer (available with ONNX opset >= 17) can help preserving accuracy.
|
| 140 |
+
[01/20/2026-07:00:15] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored.
|
| 141 |
+
[01/20/2026-07:00:57] [I] [TRT] Compiler backend is used during engine build.
|
| 142 |
+
[01/20/2026-07:02:46] [I] [TRT] Detected 2 inputs and 3 output network tensors.
|
| 143 |
+
[01/20/2026-07:02:47] [I] [TRT] Total Host Persistent Memory: 324864 bytes
|
| 144 |
+
[01/20/2026-07:02:47] [I] [TRT] Total Device Persistent Memory: 3072 bytes
|
| 145 |
+
[01/20/2026-07:02:47] [I] [TRT] Max Scratch Memory: 11514880 bytes
|
| 146 |
+
[01/20/2026-07:02:47] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 94 steps to complete.
|
| 147 |
+
[01/20/2026-07:02:47] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 2.83501ms to assign 11 blocks to 94 nodes requiring 28498432 bytes.
|
| 148 |
+
[01/20/2026-07:02:47] [I] [TRT] Total Activation Memory: 28498432 bytes
|
| 149 |
+
[01/20/2026-07:02:47] [I] [TRT] Total Weights Memory: 64819648 bytes
|
| 150 |
+
[01/20/2026-07:02:47] [I] [TRT] Compiler backend is used during engine execution.
|
| 151 |
+
[01/20/2026-07:02:47] [I] [TRT] Engine generation completed in 152.607 seconds.
|
| 152 |
+
[01/20/2026-07:02:47] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 41 MiB, GPU 228 MiB
|
| 153 |
+
[01/20/2026-07:02:47] [I] Created engine with size: 67.0192 MiB
|
| 154 |
+
[01/20/2026-07:02:47] [I] Engine built in 152.978 sec.
|
| 155 |
+
[01/20/2026-07:02:48] [I] [TRT] Loaded engine size: 67 MiB
|
| 156 |
+
[01/20/2026-07:02:48] [I] Engine deserialized in 0.0254528 sec.
|
| 157 |
+
[01/20/2026-07:02:48] [I] [TRT] [MS] Running engine with multi stream info
|
| 158 |
+
[01/20/2026-07:02:48] [I] [TRT] [MS] Number of aux streams is 3
|
| 159 |
+
[01/20/2026-07:02:48] [I] [TRT] [MS] Number of total worker streams is 4
|
| 160 |
+
[01/20/2026-07:02:48] [I] [TRT] [MS] The main stream provided by execute/enqueue calls is the first worker stream
|
| 161 |
+
[01/20/2026-07:02:48] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +28, now: CPU 0, GPU 89 (MiB)
|
| 162 |
+
[01/20/2026-07:02:48] [I] Setting persistentCacheLimit to 0 bytes.
|
| 163 |
+
[01/20/2026-07:02:48] [I] Created execution context with device memory size: 27.1782 MiB
|
| 164 |
+
[01/20/2026-07:02:48] [I] Using random values for input images
|
| 165 |
+
[01/20/2026-07:02:48] [I] Input binding for images with dimensions 1x3x640x640 is created.
|
| 166 |
+
[01/20/2026-07:02:48] [I] Using random values for input orig_target_sizes
|
| 167 |
+
[01/20/2026-07:02:48] [I] Input binding for orig_target_sizes with dimensions 1x2 is created.
|
| 168 |
+
[01/20/2026-07:02:48] [I] Output binding for labels with dimensions 1x300 is created.
|
| 169 |
+
[01/20/2026-07:02:48] [I] Output binding for boxes with dimensions 1x300x4 is created.
|
| 170 |
+
[01/20/2026-07:02:48] [I] Output binding for scores with dimensions 1x300 is created.
|
| 171 |
+
[01/20/2026-07:02:48] [I] Starting inference
|
| 172 |
+
[01/20/2026-07:02:51] [I] Warmup completed 88 queries over 200 ms
|
| 173 |
+
[01/20/2026-07:02:51] [I] Timing trace has 1317 queries over 3.00591 s
|
| 174 |
+
[01/20/2026-07:02:51] [I]
|
| 175 |
+
[01/20/2026-07:02:51] [I] === Trace details ===
|
| 176 |
+
[01/20/2026-07:02:51] [I] Trace averages of 10 runs:
|
| 177 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27098 ms - Host latency: 2.50865 ms (enqueue 0.514394 ms)
|
| 178 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.26981 ms - Host latency: 2.50693 ms (enqueue 0.489441 ms)
|
| 179 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27171 ms - Host latency: 2.50977 ms (enqueue 0.487265 ms)
|
| 180 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27035 ms - Host latency: 2.50779 ms (enqueue 0.505414 ms)
|
| 181 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.26972 ms - Host latency: 2.50679 ms (enqueue 0.497726 ms)
|
| 182 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.26867 ms - Host latency: 2.50542 ms (enqueue 0.509586 ms)
|
| 183 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27041 ms - Host latency: 2.50663 ms (enqueue 0.510934 ms)
|
| 184 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.26755 ms - Host latency: 2.50529 ms (enqueue 0.515619 ms)
|
| 185 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27157 ms - Host latency: 2.50828 ms (enqueue 0.504404 ms)
|
| 186 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.2727 ms - Host latency: 2.51041 ms (enqueue 0.495471 ms)
|
| 187 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27363 ms - Host latency: 2.51047 ms (enqueue 0.47641 ms)
|
| 188 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27285 ms - Host latency: 2.50984 ms (enqueue 0.462106 ms)
|
| 189 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27835 ms - Host latency: 2.51419 ms (enqueue 0.711786 ms)
|
| 190 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28509 ms - Host latency: 2.51961 ms (enqueue 0.892325 ms)
|
| 191 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27975 ms - Host latency: 2.51673 ms (enqueue 0.472803 ms)
|
| 192 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27742 ms - Host latency: 2.5144 ms (enqueue 0.472925 ms)
|
| 193 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27742 ms - Host latency: 2.51455 ms (enqueue 0.474133 ms)
|
| 194 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28213 ms - Host latency: 2.51862 ms (enqueue 0.478882 ms)
|
| 195 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28171 ms - Host latency: 2.51753 ms (enqueue 0.511072 ms)
|
| 196 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.2776 ms - Host latency: 2.51526 ms (enqueue 0.491754 ms)
|
| 197 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28048 ms - Host latency: 2.51752 ms (enqueue 0.478693 ms)
|
| 198 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27651 ms - Host latency: 2.5116 ms (enqueue 0.491394 ms)
|
| 199 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28107 ms - Host latency: 2.51829 ms (enqueue 0.463324 ms)
|
| 200 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28213 ms - Host latency: 2.51918 ms (enqueue 0.464856 ms)
|
| 201 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28241 ms - Host latency: 2.51914 ms (enqueue 0.462903 ms)
|
| 202 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27897 ms - Host latency: 2.51638 ms (enqueue 0.470313 ms)
|
| 203 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28048 ms - Host latency: 2.51809 ms (enqueue 0.462689 ms)
|
| 204 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28009 ms - Host latency: 2.51761 ms (enqueue 0.471527 ms)
|
| 205 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27842 ms - Host latency: 2.51548 ms (enqueue 0.467664 ms)
|
| 206 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27874 ms - Host latency: 2.51558 ms (enqueue 0.465485 ms)
|
| 207 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27916 ms - Host latency: 2.51591 ms (enqueue 0.47912 ms)
|
| 208 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28259 ms - Host latency: 2.5198 ms (enqueue 0.462988 ms)
|
| 209 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27903 ms - Host latency: 2.51617 ms (enqueue 0.466962 ms)
|
| 210 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28026 ms - Host latency: 2.51755 ms (enqueue 0.462415 ms)
|
| 211 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27534 ms - Host latency: 2.51021 ms (enqueue 0.945868 ms)
|
| 212 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28122 ms - Host latency: 2.51689 ms (enqueue 0.970319 ms)
|
| 213 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27754 ms - Host latency: 2.51541 ms (enqueue 0.486304 ms)
|
| 214 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27487 ms - Host latency: 2.50981 ms (enqueue 0.515942 ms)
|
| 215 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28162 ms - Host latency: 2.51888 ms (enqueue 0.479968 ms)
|
| 216 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27969 ms - Host latency: 2.51653 ms (enqueue 0.470239 ms)
|
| 217 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28187 ms - Host latency: 2.51904 ms (enqueue 0.465942 ms)
|
| 218 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28409 ms - Host latency: 2.52174 ms (enqueue 0.474292 ms)
|
| 219 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27986 ms - Host latency: 2.51709 ms (enqueue 0.485034 ms)
|
| 220 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27975 ms - Host latency: 2.51683 ms (enqueue 0.472192 ms)
|
| 221 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28236 ms - Host latency: 2.52018 ms (enqueue 0.474341 ms)
|
| 222 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28416 ms - Host latency: 2.52128 ms (enqueue 0.463013 ms)
|
| 223 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28044 ms - Host latency: 2.5171 ms (enqueue 0.48208 ms)
|
| 224 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27992 ms - Host latency: 2.51689 ms (enqueue 0.47168 ms)
|
| 225 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28152 ms - Host latency: 2.51903 ms (enqueue 0.465649 ms)
|
| 226 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27975 ms - Host latency: 2.51759 ms (enqueue 0.469043 ms)
|
| 227 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27831 ms - Host latency: 2.51587 ms (enqueue 0.467981 ms)
|
| 228 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27915 ms - Host latency: 2.51582 ms (enqueue 0.462842 ms)
|
| 229 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28033 ms - Host latency: 2.51736 ms (enqueue 0.466785 ms)
|
| 230 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27895 ms - Host latency: 2.51575 ms (enqueue 0.464966 ms)
|
| 231 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.2793 ms - Host latency: 2.51639 ms (enqueue 0.487378 ms)
|
| 232 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27976 ms - Host latency: 2.51681 ms (enqueue 0.487915 ms)
|
| 233 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28069 ms - Host latency: 2.51779 ms (enqueue 0.482727 ms)
|
| 234 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27924 ms - Host latency: 2.51631 ms (enqueue 0.477234 ms)
|
| 235 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28 ms - Host latency: 2.51755 ms (enqueue 0.469946 ms)
|
| 236 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28087 ms - Host latency: 2.51851 ms (enqueue 0.471277 ms)
|
| 237 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27739 ms - Host latency: 2.51417 ms (enqueue 0.469543 ms)
|
| 238 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28029 ms - Host latency: 2.51709 ms (enqueue 0.471484 ms)
|
| 239 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27944 ms - Host latency: 2.51632 ms (enqueue 0.469019 ms)
|
| 240 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28044 ms - Host latency: 2.51754 ms (enqueue 0.473718 ms)
|
| 241 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28026 ms - Host latency: 2.51804 ms (enqueue 0.471216 ms)
|
| 242 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27955 ms - Host latency: 2.51742 ms (enqueue 0.474963 ms)
|
| 243 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28026 ms - Host latency: 2.51748 ms (enqueue 0.468542 ms)
|
| 244 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28252 ms - Host latency: 2.5196 ms (enqueue 0.474548 ms)
|
| 245 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28036 ms - Host latency: 2.51754 ms (enqueue 0.481628 ms)
|
| 246 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28208 ms - Host latency: 2.51941 ms (enqueue 0.520898 ms)
|
| 247 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27975 ms - Host latency: 2.51682 ms (enqueue 0.470752 ms)
|
| 248 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27847 ms - Host latency: 2.51559 ms (enqueue 0.468323 ms)
|
| 249 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28071 ms - Host latency: 2.51849 ms (enqueue 0.466699 ms)
|
| 250 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28152 ms - Host latency: 2.51973 ms (enqueue 0.472534 ms)
|
| 251 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28058 ms - Host latency: 2.51722 ms (enqueue 0.483984 ms)
|
| 252 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27976 ms - Host latency: 2.51708 ms (enqueue 0.467993 ms)
|
| 253 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27767 ms - Host latency: 2.51388 ms (enqueue 0.476794 ms)
|
| 254 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.2778 ms - Host latency: 2.51489 ms (enqueue 0.467603 ms)
|
| 255 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27708 ms - Host latency: 2.51376 ms (enqueue 0.466467 ms)
|
| 256 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27933 ms - Host latency: 2.51658 ms (enqueue 0.469104 ms)
|
| 257 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27866 ms - Host latency: 2.51587 ms (enqueue 0.467322 ms)
|
| 258 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27915 ms - Host latency: 2.51628 ms (enqueue 0.467969 ms)
|
| 259 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28174 ms - Host latency: 2.51907 ms (enqueue 0.469824 ms)
|
| 260 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27839 ms - Host latency: 2.51562 ms (enqueue 0.484106 ms)
|
| 261 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.278 ms - Host latency: 2.5147 ms (enqueue 0.479224 ms)
|
| 262 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27961 ms - Host latency: 2.51604 ms (enqueue 0.467261 ms)
|
| 263 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27957 ms - Host latency: 2.51694 ms (enqueue 0.467187 ms)
|
| 264 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.2801 ms - Host latency: 2.5177 ms (enqueue 0.470972 ms)
|
| 265 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28098 ms - Host latency: 2.51821 ms (enqueue 0.471387 ms)
|
| 266 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28098 ms - Host latency: 2.51853 ms (enqueue 0.469434 ms)
|
| 267 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27847 ms - Host latency: 2.51536 ms (enqueue 0.461865 ms)
|
| 268 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27966 ms - Host latency: 2.51677 ms (enqueue 0.462427 ms)
|
| 269 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.2783 ms - Host latency: 2.51543 ms (enqueue 0.464453 ms)
|
| 270 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27996 ms - Host latency: 2.51707 ms (enqueue 0.464673 ms)
|
| 271 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28093 ms - Host latency: 2.51807 ms (enqueue 0.465015 ms)
|
| 272 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28101 ms - Host latency: 2.51799 ms (enqueue 0.462305 ms)
|
| 273 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27974 ms - Host latency: 2.51714 ms (enqueue 0.462427 ms)
|
| 274 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27922 ms - Host latency: 2.5166 ms (enqueue 0.461841 ms)
|
| 275 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28093 ms - Host latency: 2.51829 ms (enqueue 0.46416 ms)
|
| 276 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27793 ms - Host latency: 2.51472 ms (enqueue 0.463647 ms)
|
| 277 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28198 ms - Host latency: 2.51899 ms (enqueue 0.462158 ms)
|
| 278 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28027 ms - Host latency: 2.51748 ms (enqueue 0.460986 ms)
|
| 279 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27988 ms - Host latency: 2.51738 ms (enqueue 0.464087 ms)
|
| 280 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27881 ms - Host latency: 2.51575 ms (enqueue 0.462378 ms)
|
| 281 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28162 ms - Host latency: 2.51948 ms (enqueue 0.4625 ms)
|
| 282 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27791 ms - Host latency: 2.51541 ms (enqueue 0.463867 ms)
|
| 283 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27837 ms - Host latency: 2.51519 ms (enqueue 0.482617 ms)
|
| 284 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27996 ms - Host latency: 2.51726 ms (enqueue 0.474951 ms)
|
| 285 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28027 ms - Host latency: 2.5176 ms (enqueue 0.518579 ms)
|
| 286 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27678 ms - Host latency: 2.51377 ms (enqueue 0.488623 ms)
|
| 287 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28098 ms - Host latency: 2.51797 ms (enqueue 0.466943 ms)
|
| 288 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28201 ms - Host latency: 2.51987 ms (enqueue 0.465771 ms)
|
| 289 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28147 ms - Host latency: 2.51907 ms (enqueue 0.465332 ms)
|
| 290 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28022 ms - Host latency: 2.51785 ms (enqueue 0.467163 ms)
|
| 291 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27856 ms - Host latency: 2.51587 ms (enqueue 0.46665 ms)
|
| 292 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28 ms - Host latency: 2.51689 ms (enqueue 0.465576 ms)
|
| 293 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27883 ms - Host latency: 2.51609 ms (enqueue 0.461475 ms)
|
| 294 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27991 ms - Host latency: 2.51729 ms (enqueue 0.468408 ms)
|
| 295 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28022 ms - Host latency: 2.51604 ms (enqueue 0.584424 ms)
|
| 296 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28245 ms - Host latency: 2.52 ms (enqueue 0.497681 ms)
|
| 297 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27969 ms - Host latency: 2.51719 ms (enqueue 0.50918 ms)
|
| 298 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27993 ms - Host latency: 2.51719 ms (enqueue 0.491577 ms)
|
| 299 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27698 ms - Host latency: 2.51423 ms (enqueue 0.483911 ms)
|
| 300 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27908 ms - Host latency: 2.51599 ms (enqueue 0.484497 ms)
|
| 301 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.2804 ms - Host latency: 2.51816 ms (enqueue 0.494702 ms)
|
| 302 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28179 ms - Host latency: 2.52053 ms (enqueue 0.512622 ms)
|
| 303 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.2801 ms - Host latency: 2.51814 ms (enqueue 0.523242 ms)
|
| 304 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28154 ms - Host latency: 2.51975 ms (enqueue 0.501465 ms)
|
| 305 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.27942 ms - Host latency: 2.51643 ms (enqueue 0.509082 ms)
|
| 306 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28088 ms - Host latency: 2.51797 ms (enqueue 0.476733 ms)
|
| 307 |
+
[01/20/2026-07:02:51] [I] Average on 10 runs - GPU latency: 2.28118 ms - Host latency: 2.51819 ms (enqueue 0.477466 ms)
|
| 308 |
+
[01/20/2026-07:02:51] [I]
|
| 309 |
+
[01/20/2026-07:02:51] [I] === Performance summary ===
|
| 310 |
+
[01/20/2026-07:02:51] [I] Throughput: 438.136 qps
|
| 311 |
+
[01/20/2026-07:02:51] [I] Latency: min = 2.36743 ms, max = 2.53223 ms, mean = 2.5161 ms, median = 2.51636 ms, percentile(90%) = 2.52319 ms, percentile(95%) = 2.5249 ms, percentile(99%) = 2.52808 ms
|
| 312 |
+
[01/20/2026-07:02:51] [I] Enqueue Time: min = 0.45752 ms, max = 1.16473 ms, mean = 0.49036 ms, median = 0.469604 ms, percentile(90%) = 0.513702 ms, percentile(95%) = 0.534668 ms, percentile(99%) = 1.04712 ms
|
| 313 |
+
[01/20/2026-07:02:51] [I] H2D Latency: min = 0.224976 ms, max = 0.241211 ms, mean = 0.231911 ms, median = 0.232056 ms, percentile(90%) = 0.233154 ms, percentile(95%) = 0.233643 ms, percentile(99%) = 0.234741 ms
|
| 314 |
+
[01/20/2026-07:02:51] [I] GPU Compute Time: min = 2.1311 ms, max = 2.2937 ms, mean = 2.27899 ms, median = 2.27942 ms, percentile(90%) = 2.28558 ms, percentile(95%) = 2.2876 ms, percentile(99%) = 2.29004 ms
|
| 315 |
+
[01/20/2026-07:02:51] [I] D2H Latency: min = 0.00390625 ms, max = 0.00720215 ms, mean = 0.00520047 ms, median = 0.00515747 ms, percentile(90%) = 0.0057373 ms, percentile(95%) = 0.00585938 ms, percentile(99%) = 0.00610352 ms
|
| 316 |
+
[01/20/2026-07:02:51] [I] Total Host Walltime: 3.00591 s
|
| 317 |
+
[01/20/2026-07:02:51] [I] Total GPU Compute Time: 3.00143 s
|
| 318 |
+
[01/20/2026-07:02:51] [I] Explanations of the performance metrics are printed in the verbose logs.
|
| 319 |
+
[01/20/2026-07:02:51] [I]
|
| 320 |
+
&&&& PASSED TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_dinov3_l_coco.onnx --saveEngine=checkpoints/deimv2_dinov3_l_coco.engine --fp16 --optShapes=images:1x3x640x640,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
deimv2_dinov3_l_coco.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7b134586ec5bb2fd32238be2b3270452741f00794f721a11545dc083f4918cd
|
| 3 |
+
size 130039918
|
deimv2_dinov3_l_coco.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54ec9c9a0f1c16e958fde04ffebb4766061fbf5312e86f80090058c82dcfa183
|
| 3 |
+
size 130815976
|
deimv2_dinov3_m_coco.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:238a28ccdde1ec2198a6e3ed6fafbaf64661efe7fb16378efc86d5694acbae6c
|
| 3 |
+
size 43787852
|
deimv2_dinov3_m_coco.log
ADDED
|
@@ -0,0 +1,364 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
&&&& RUNNING TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_dinov3_m_coco.onnx --saveEngine=checkpoints/deimv2_dinov3_m_coco.engine --fp16 --optShapes=images:1x3x640x640,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
| 2 |
+
[01/20/2026-06:57:43] [W] optShapes is being broadcasted to minShapes for tensor orig_target_sizes
|
| 3 |
+
[01/20/2026-06:57:43] [W] optShapes is being broadcasted to maxShapes for tensor orig_target_sizes
|
| 4 |
+
[01/20/2026-06:57:43] [W] optShapes is being broadcasted to minShapes for tensor images
|
| 5 |
+
[01/20/2026-06:57:43] [W] optShapes is being broadcasted to maxShapes for tensor images
|
| 6 |
+
[01/20/2026-06:57:43] [W] Weakly-typed networks have been deprecated in TensorRT. You can use the AutoCast tool (https://nvidia.github.io/TensorRT-Model-Optimizer/guides/8_autocast.html) to convert the network to be strongly typed.
|
| 7 |
+
[01/20/2026-06:57:43] [I] === Model Options ===
|
| 8 |
+
[01/20/2026-06:57:43] [I] Format: ONNX
|
| 9 |
+
[01/20/2026-06:57:43] [I] Model: checkpoints/deimv2_dinov3_m_coco.onnx
|
| 10 |
+
[01/20/2026-06:57:43] [I] Output:
|
| 11 |
+
[01/20/2026-06:57:43] [I] === Build Options ===
|
| 12 |
+
[01/20/2026-06:57:43] [I] Memory Pools: workspace: 4096 MiB, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default, tacticSharedMem: default
|
| 13 |
+
[01/20/2026-06:57:43] [I] avgTiming: 8
|
| 14 |
+
[01/20/2026-06:57:43] [I] Precision: FP32+FP16
|
| 15 |
+
[01/20/2026-06:57:43] [I] LayerPrecisions:
|
| 16 |
+
[01/20/2026-06:57:43] [I] Layer Device Types:
|
| 17 |
+
[01/20/2026-06:57:43] [I] Decomposable Attentions:
|
| 18 |
+
[01/20/2026-06:57:43] [I] Calibration:
|
| 19 |
+
[01/20/2026-06:57:43] [I] Refit: Disabled
|
| 20 |
+
[01/20/2026-06:57:43] [I] Strip weights: Disabled
|
| 21 |
+
[01/20/2026-06:57:43] [I] Version Compatible: Disabled
|
| 22 |
+
[01/20/2026-06:57:43] [I] ONNX Plugin InstanceNorm: Disabled
|
| 23 |
+
[01/20/2026-06:57:43] [I] ONNX kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA flag: Disabled
|
| 24 |
+
[01/20/2026-06:57:43] [I] TensorRT runtime: full
|
| 25 |
+
[01/20/2026-06:57:43] [I] Lean DLL Path:
|
| 26 |
+
[01/20/2026-06:57:43] [I] Tempfile Controls: { in_memory: allow, temporary: allow }
|
| 27 |
+
[01/20/2026-06:57:43] [I] Exclude Lean Runtime: Disabled
|
| 28 |
+
[01/20/2026-06:57:43] [I] Sparsity: Disabled
|
| 29 |
+
[01/20/2026-06:57:43] [I] Safe mode: Disabled
|
| 30 |
+
[01/20/2026-06:57:43] [I] Build DLA standalone loadable: Disabled
|
| 31 |
+
[01/20/2026-06:57:43] [I] Allow GPU fallback for DLA: Disabled
|
| 32 |
+
[01/20/2026-06:57:43] [I] DirectIO mode: Disabled
|
| 33 |
+
[01/20/2026-06:57:43] [I] Restricted mode: Disabled
|
| 34 |
+
[01/20/2026-06:57:43] [I] Skip inference: Disabled
|
| 35 |
+
[01/20/2026-06:57:43] [I] Save engine: checkpoints/deimv2_dinov3_m_coco.engine
|
| 36 |
+
[01/20/2026-06:57:43] [I] Load engine:
|
| 37 |
+
[01/20/2026-06:57:43] [I] Profiling verbosity: 0
|
| 38 |
+
[01/20/2026-06:57:43] [I] Tactic sources: Using default tactic sources
|
| 39 |
+
[01/20/2026-06:57:43] [I] timingCacheMode: local
|
| 40 |
+
[01/20/2026-06:57:43] [I] timingCacheFile:
|
| 41 |
+
[01/20/2026-06:57:43] [I] Enable Compilation Cache: Enabled
|
| 42 |
+
[01/20/2026-06:57:43] [I] Enable Monitor Memory: Disabled
|
| 43 |
+
[01/20/2026-06:57:43] [I] errorOnTimingCacheMiss: Disabled
|
| 44 |
+
[01/20/2026-06:57:43] [I] Preview Features: Use default preview flags.
|
| 45 |
+
[01/20/2026-06:57:43] [I] MaxAuxStreams: -1
|
| 46 |
+
[01/20/2026-06:57:43] [I] BuilderOptimizationLevel: 3
|
| 47 |
+
[01/20/2026-06:57:43] [I] MaxTactics: -1
|
| 48 |
+
[01/20/2026-06:57:43] [I] Calibration Profile Index: 0
|
| 49 |
+
[01/20/2026-06:57:43] [I] Weight Streaming: Disabled
|
| 50 |
+
[01/20/2026-06:57:43] [I] Runtime Platform: Same As Build
|
| 51 |
+
[01/20/2026-06:57:43] [I] Debug Tensors:
|
| 52 |
+
[01/20/2026-06:57:43] [I] Distributive Independence: Disabled
|
| 53 |
+
[01/20/2026-06:57:43] [I] Mark Unfused Tensors As Debug Tensors: Disabled
|
| 54 |
+
[01/20/2026-06:57:43] [I] Input(s)s format: fp32:CHW
|
| 55 |
+
[01/20/2026-06:57:43] [I] Output(s)s format: fp32:CHW
|
| 56 |
+
[01/20/2026-06:57:43] [I] Input build shape (profile 0): images=1x3x640x640+1x3x640x640+1x3x640x640
|
| 57 |
+
[01/20/2026-06:57:43] [I] Input build shape (profile 0): orig_target_sizes=1x2+1x2+1x2
|
| 58 |
+
[01/20/2026-06:57:43] [I] Input calibration shapes: model
|
| 59 |
+
[01/20/2026-06:57:43] [I] === System Options ===
|
| 60 |
+
[01/20/2026-06:57:43] [I] Device: 0
|
| 61 |
+
[01/20/2026-06:57:43] [I] DLACore:
|
| 62 |
+
[01/20/2026-06:57:43] [I] Plugins:
|
| 63 |
+
[01/20/2026-06:57:43] [I] setPluginsToSerialize:
|
| 64 |
+
[01/20/2026-06:57:43] [I] dynamicPlugins:
|
| 65 |
+
[01/20/2026-06:57:43] [I] ignoreParsedPluginLibs: 0
|
| 66 |
+
[01/20/2026-06:57:43] [I]
|
| 67 |
+
[01/20/2026-06:57:43] [I] === Inference Options ===
|
| 68 |
+
[01/20/2026-06:57:43] [I] Batch: Explicit
|
| 69 |
+
[01/20/2026-06:57:43] [I] Input inference shape : orig_target_sizes=1x2
|
| 70 |
+
[01/20/2026-06:57:43] [I] Input inference shape : images=1x3x640x640
|
| 71 |
+
[01/20/2026-06:57:43] [I] Iterations: 10
|
| 72 |
+
[01/20/2026-06:57:43] [I] Duration: 3s (+ 200ms warm up)
|
| 73 |
+
[01/20/2026-06:57:43] [I] Sleep time: 0ms
|
| 74 |
+
[01/20/2026-06:57:43] [I] Idle time: 0ms
|
| 75 |
+
[01/20/2026-06:57:43] [I] Inference Streams: 1
|
| 76 |
+
[01/20/2026-06:57:43] [I] ExposeDMA: Disabled
|
| 77 |
+
[01/20/2026-06:57:43] [I] Data transfers: Enabled
|
| 78 |
+
[01/20/2026-06:57:43] [I] Spin-wait: Disabled
|
| 79 |
+
[01/20/2026-06:57:43] [I] Multithreading: Disabled
|
| 80 |
+
[01/20/2026-06:57:43] [I] CUDA Graph: Disabled
|
| 81 |
+
[01/20/2026-06:57:43] [I] Separate profiling: Disabled
|
| 82 |
+
[01/20/2026-06:57:43] [I] Time Deserialize: Disabled
|
| 83 |
+
[01/20/2026-06:57:43] [I] Time Refit: Disabled
|
| 84 |
+
[01/20/2026-06:57:43] [I] NVTX verbosity: 0
|
| 85 |
+
[01/20/2026-06:57:43] [I] Persistent Cache Ratio: 0
|
| 86 |
+
[01/20/2026-06:57:43] [I] Optimization Profile Index: 0
|
| 87 |
+
[01/20/2026-06:57:43] [I] Weight Streaming Budget: 100.000000%
|
| 88 |
+
[01/20/2026-06:57:43] [I] Inputs:
|
| 89 |
+
[01/20/2026-06:57:43] [I] Debug Tensor Save Destinations:
|
| 90 |
+
[01/20/2026-06:57:43] [I] Dump All Debug Tensor in Formats:
|
| 91 |
+
[01/20/2026-06:57:43] [I] === Reporting Options ===
|
| 92 |
+
[01/20/2026-06:57:43] [I] Verbose: Disabled
|
| 93 |
+
[01/20/2026-06:57:43] [I] Averages: 10 inferences
|
| 94 |
+
[01/20/2026-06:57:43] [I] Percentiles: 90,95,99
|
| 95 |
+
[01/20/2026-06:57:43] [I] Dump refittable layers:Disabled
|
| 96 |
+
[01/20/2026-06:57:43] [I] Dump output: Disabled
|
| 97 |
+
[01/20/2026-06:57:43] [I] Profile: Disabled
|
| 98 |
+
[01/20/2026-06:57:43] [I] Export timing to JSON file:
|
| 99 |
+
[01/20/2026-06:57:43] [I] Export output to JSON file:
|
| 100 |
+
[01/20/2026-06:57:43] [I] Export profile to JSON file:
|
| 101 |
+
[01/20/2026-06:57:43] [I]
|
| 102 |
+
[01/20/2026-06:57:43] [I] === Device Information ===
|
| 103 |
+
[01/20/2026-06:57:43] [I] Available Devices:
|
| 104 |
+
[01/20/2026-06:57:43] [I] Device 0: "NVIDIA GeForce RTX 4090" UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 105 |
+
[01/20/2026-06:57:43] [I] Selected Device: NVIDIA GeForce RTX 4090
|
| 106 |
+
[01/20/2026-06:57:43] [I] Selected Device ID: 0
|
| 107 |
+
[01/20/2026-06:57:43] [I] Selected Device UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 108 |
+
[01/20/2026-06:57:43] [I] Compute Capability: 8.9
|
| 109 |
+
[01/20/2026-06:57:43] [I] SMs: 128
|
| 110 |
+
[01/20/2026-06:57:43] [I] Device Global Memory: 24071 MiB
|
| 111 |
+
[01/20/2026-06:57:43] [I] Shared Memory per SM: 100 KiB
|
| 112 |
+
[01/20/2026-06:57:43] [I] Memory Bus Width: 384 bits (ECC disabled)
|
| 113 |
+
[01/20/2026-06:57:43] [I] Application Compute Clock Rate: 2.52 GHz
|
| 114 |
+
[01/20/2026-06:57:43] [I] Application Memory Clock Rate: 10.501 GHz
|
| 115 |
+
[01/20/2026-06:57:43] [I]
|
| 116 |
+
[01/20/2026-06:57:43] [I] Note: The application clock rates do not reflect the actual clock rates that the GPU is currently running at.
|
| 117 |
+
[01/20/2026-06:57:43] [I]
|
| 118 |
+
[01/20/2026-06:57:43] [I] TensorRT version: 10.14.1
|
| 119 |
+
[01/20/2026-06:57:43] [I] Loading standard plugins
|
| 120 |
+
[01/20/2026-06:57:43] [I] [TRT] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 29, GPU 10549 (MiB)
|
| 121 |
+
[01/20/2026-06:57:44] [I] Start parsing network model.
|
| 122 |
+
[01/20/2026-06:57:44] [I] [TRT] ----------------------------------------------------------------
|
| 123 |
+
[01/20/2026-06:57:44] [I] [TRT] Input filename: checkpoints/deimv2_dinov3_m_coco.onnx
|
| 124 |
+
[01/20/2026-06:57:44] [I] [TRT] ONNX IR version: 0.0.8
|
| 125 |
+
[01/20/2026-06:57:44] [I] [TRT] Opset version: 17
|
| 126 |
+
[01/20/2026-06:57:44] [I] [TRT] Producer name: pytorch
|
| 127 |
+
[01/20/2026-06:57:44] [I] [TRT] Producer version: 2.10.0
|
| 128 |
+
[01/20/2026-06:57:44] [I] [TRT] Domain:
|
| 129 |
+
[01/20/2026-06:57:44] [I] [TRT] Model version: 0
|
| 130 |
+
[01/20/2026-06:57:44] [I] [TRT] Doc string:
|
| 131 |
+
[01/20/2026-06:57:44] [I] [TRT] ----------------------------------------------------------------
|
| 132 |
+
[01/20/2026-06:57:44] [W] [TRT] ModelImporter.cpp:661: Make sure input orig_target_sizes has Int64 binding.
|
| 133 |
+
[01/20/2026-06:57:44] [W] [TRT] ModelImporter.cpp:908: Make sure output labels has Int64 binding.
|
| 134 |
+
[01/20/2026-06:57:44] [I] Finished parsing network model. Parse time: 0.111049
|
| 135 |
+
[01/20/2026-06:57:44] [I] Set shape of input tensor images for optimization profile 0 to: MIN=1x3x640x640 OPT=1x3x640x640 MAX=1x3x640x640
|
| 136 |
+
[01/20/2026-06:57:44] [I] Set shape of input tensor orig_target_sizes for optimization profile 0 to: MIN=1x2 OPT=1x2 MAX=1x2
|
| 137 |
+
[01/20/2026-06:57:44] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +203, GPU +4, now: CPU 603, GPU 10553 (MiB)
|
| 138 |
+
[01/20/2026-06:57:44] [W] [TRT] Detected layernorm nodes in FP16.
|
| 139 |
+
[01/20/2026-06:57:44] [W] [TRT] Running layernorm after self-attention with FP16 Reduce or Pow may cause overflow. Forcing Reduce or Pow Layers in FP32 precision, or exporting the model to use INormalizationLayer (available with ONNX opset >= 17) can help preserving accuracy.
|
| 140 |
+
[01/20/2026-06:57:44] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored.
|
| 141 |
+
[01/20/2026-06:58:27] [I] [TRT] Compiler backend is used during engine build.
|
| 142 |
+
[01/20/2026-07:00:09] [I] [TRT] Detected 2 inputs and 3 output network tensors.
|
| 143 |
+
[01/20/2026-07:00:10] [I] [TRT] Total Host Persistent Memory: 325824 bytes
|
| 144 |
+
[01/20/2026-07:00:10] [I] [TRT] Total Device Persistent Memory: 3072 bytes
|
| 145 |
+
[01/20/2026-07:00:10] [I] [TRT] Max Scratch Memory: 12826624 bytes
|
| 146 |
+
[01/20/2026-07:00:10] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 115 steps to complete.
|
| 147 |
+
[01/20/2026-07:00:10] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 4.27138ms to assign 11 blocks to 115 nodes requiring 27832320 bytes.
|
| 148 |
+
[01/20/2026-07:00:10] [I] [TRT] Total Activation Memory: 27832320 bytes
|
| 149 |
+
[01/20/2026-07:00:10] [I] [TRT] Total Weights Memory: 36877312 bytes
|
| 150 |
+
[01/20/2026-07:00:10] [I] [TRT] Compiler backend is used during engine execution.
|
| 151 |
+
[01/20/2026-07:00:10] [I] [TRT] Engine generation completed in 146.199 seconds.
|
| 152 |
+
[01/20/2026-07:00:10] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 22 MiB, GPU 133 MiB
|
| 153 |
+
[01/20/2026-07:00:10] [I] Created engine with size: 41.7593 MiB
|
| 154 |
+
[01/20/2026-07:00:10] [I] Engine built in 146.523 sec.
|
| 155 |
+
[01/20/2026-07:00:10] [I] [TRT] Loaded engine size: 41 MiB
|
| 156 |
+
[01/20/2026-07:00:10] [I] Engine deserialized in 0.0202745 sec.
|
| 157 |
+
[01/20/2026-07:00:10] [I] [TRT] [MS] Running engine with multi stream info
|
| 158 |
+
[01/20/2026-07:00:10] [I] [TRT] [MS] Number of aux streams is 2
|
| 159 |
+
[01/20/2026-07:00:10] [I] [TRT] [MS] Number of total worker streams is 3
|
| 160 |
+
[01/20/2026-07:00:10] [I] [TRT] [MS] The main stream provided by execute/enqueue calls is the first worker stream
|
| 161 |
+
[01/20/2026-07:00:11] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +26, now: CPU 0, GPU 61 (MiB)
|
| 162 |
+
[01/20/2026-07:00:11] [I] Setting persistentCacheLimit to 0 bytes.
|
| 163 |
+
[01/20/2026-07:00:11] [I] Created execution context with device memory size: 26.543 MiB
|
| 164 |
+
[01/20/2026-07:00:11] [I] Using random values for input images
|
| 165 |
+
[01/20/2026-07:00:11] [I] Input binding for images with dimensions 1x3x640x640 is created.
|
| 166 |
+
[01/20/2026-07:00:11] [I] Using random values for input orig_target_sizes
|
| 167 |
+
[01/20/2026-07:00:11] [I] Input binding for orig_target_sizes with dimensions 1x2 is created.
|
| 168 |
+
[01/20/2026-07:00:11] [I] Output binding for labels with dimensions 1x300 is created.
|
| 169 |
+
[01/20/2026-07:00:11] [I] Output binding for boxes with dimensions 1x300x4 is created.
|
| 170 |
+
[01/20/2026-07:00:11] [I] Output binding for scores with dimensions 1x300 is created.
|
| 171 |
+
[01/20/2026-07:00:11] [I] Starting inference
|
| 172 |
+
[01/20/2026-07:00:14] [I] Warmup completed 116 queries over 200 ms
|
| 173 |
+
[01/20/2026-07:00:14] [I] Timing trace has 1755 queries over 3.00676 s
|
| 174 |
+
[01/20/2026-07:00:14] [I]
|
| 175 |
+
[01/20/2026-07:00:14] [I] === Trace details ===
|
| 176 |
+
[01/20/2026-07:00:14] [I] Trace averages of 10 runs:
|
| 177 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71065 ms - Host latency: 1.93786 ms (enqueue 0.492282 ms)
|
| 178 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7107 ms - Host latency: 1.93874 ms (enqueue 0.488414 ms)
|
| 179 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70995 ms - Host latency: 1.93733 ms (enqueue 0.526463 ms)
|
| 180 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71133 ms - Host latency: 1.93822 ms (enqueue 0.525546 ms)
|
| 181 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70973 ms - Host latency: 1.93734 ms (enqueue 0.530426 ms)
|
| 182 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71135 ms - Host latency: 1.93613 ms (enqueue 0.549188 ms)
|
| 183 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71086 ms - Host latency: 1.93573 ms (enqueue 0.528625 ms)
|
| 184 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71043 ms - Host latency: 1.9369 ms (enqueue 0.49935 ms)
|
| 185 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7101 ms - Host latency: 1.9375 ms (enqueue 0.497095 ms)
|
| 186 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7111 ms - Host latency: 1.93745 ms (enqueue 0.503442 ms)
|
| 187 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71066 ms - Host latency: 1.93687 ms (enqueue 0.492407 ms)
|
| 188 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71055 ms - Host latency: 1.93765 ms (enqueue 0.490912 ms)
|
| 189 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71219 ms - Host latency: 1.94001 ms (enqueue 0.498041 ms)
|
| 190 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71031 ms - Host latency: 1.93792 ms (enqueue 0.491391 ms)
|
| 191 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71117 ms - Host latency: 1.93779 ms (enqueue 0.498175 ms)
|
| 192 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71083 ms - Host latency: 1.93638 ms (enqueue 0.86391 ms)
|
| 193 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71071 ms - Host latency: 1.93495 ms (enqueue 0.834747 ms)
|
| 194 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71078 ms - Host latency: 1.93781 ms (enqueue 0.491089 ms)
|
| 195 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71065 ms - Host latency: 1.93792 ms (enqueue 0.489108 ms)
|
| 196 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70977 ms - Host latency: 1.93677 ms (enqueue 0.490356 ms)
|
| 197 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71003 ms - Host latency: 1.93786 ms (enqueue 0.485187 ms)
|
| 198 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71058 ms - Host latency: 1.93816 ms (enqueue 0.488672 ms)
|
| 199 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71141 ms - Host latency: 1.93936 ms (enqueue 0.489832 ms)
|
| 200 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71136 ms - Host latency: 1.93969 ms (enqueue 0.48515 ms)
|
| 201 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71125 ms - Host latency: 1.93917 ms (enqueue 0.574927 ms)
|
| 202 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71116 ms - Host latency: 1.93246 ms (enqueue 1.08856 ms)
|
| 203 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71023 ms - Host latency: 1.93638 ms (enqueue 0.562469 ms)
|
| 204 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70984 ms - Host latency: 1.93774 ms (enqueue 0.48985 ms)
|
| 205 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71052 ms - Host latency: 1.93808 ms (enqueue 0.497687 ms)
|
| 206 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71116 ms - Host latency: 1.93878 ms (enqueue 0.501562 ms)
|
| 207 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71047 ms - Host latency: 1.93845 ms (enqueue 0.493317 ms)
|
| 208 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71025 ms - Host latency: 1.93754 ms (enqueue 0.496393 ms)
|
| 209 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71062 ms - Host latency: 1.93603 ms (enqueue 0.518878 ms)
|
| 210 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71017 ms - Host latency: 1.93625 ms (enqueue 0.53186 ms)
|
| 211 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70995 ms - Host latency: 1.93616 ms (enqueue 0.539484 ms)
|
| 212 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71107 ms - Host latency: 1.93729 ms (enqueue 0.537775 ms)
|
| 213 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70984 ms - Host latency: 1.9377 ms (enqueue 0.519049 ms)
|
| 214 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71083 ms - Host latency: 1.93759 ms (enqueue 0.531195 ms)
|
| 215 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71141 ms - Host latency: 1.93752 ms (enqueue 0.516901 ms)
|
| 216 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71041 ms - Host latency: 1.93815 ms (enqueue 0.491235 ms)
|
| 217 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71097 ms - Host latency: 1.93666 ms (enqueue 0.488153 ms)
|
| 218 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71036 ms - Host latency: 1.93724 ms (enqueue 0.486969 ms)
|
| 219 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7112 ms - Host latency: 1.93932 ms (enqueue 0.4922 ms)
|
| 220 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71155 ms - Host latency: 1.93663 ms (enqueue 0.522638 ms)
|
| 221 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71089 ms - Host latency: 1.93691 ms (enqueue 0.641669 ms)
|
| 222 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71083 ms - Host latency: 1.93942 ms (enqueue 0.48739 ms)
|
| 223 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71106 ms - Host latency: 1.93726 ms (enqueue 0.808691 ms)
|
| 224 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71108 ms - Host latency: 1.93253 ms (enqueue 1.08926 ms)
|
| 225 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7103 ms - Host latency: 1.93208 ms (enqueue 1.10594 ms)
|
| 226 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70994 ms - Host latency: 1.93262 ms (enqueue 1.06667 ms)
|
| 227 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71105 ms - Host latency: 1.93856 ms (enqueue 0.528882 ms)
|
| 228 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7103 ms - Host latency: 1.93604 ms (enqueue 0.518848 ms)
|
| 229 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71063 ms - Host latency: 1.93771 ms (enqueue 0.505676 ms)
|
| 230 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70941 ms - Host latency: 1.93557 ms (enqueue 0.498169 ms)
|
| 231 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71129 ms - Host latency: 1.93687 ms (enqueue 0.499194 ms)
|
| 232 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70959 ms - Host latency: 1.93779 ms (enqueue 0.492566 ms)
|
| 233 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71169 ms - Host latency: 1.93917 ms (enqueue 0.485571 ms)
|
| 234 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71071 ms - Host latency: 1.93884 ms (enqueue 0.483264 ms)
|
| 235 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71002 ms - Host latency: 1.93765 ms (enqueue 0.482495 ms)
|
| 236 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71123 ms - Host latency: 1.93873 ms (enqueue 0.502905 ms)
|
| 237 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71139 ms - Host latency: 1.93827 ms (enqueue 0.491675 ms)
|
| 238 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71141 ms - Host latency: 1.93834 ms (enqueue 0.504932 ms)
|
| 239 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71006 ms - Host latency: 1.93832 ms (enqueue 0.490552 ms)
|
| 240 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70994 ms - Host latency: 1.93782 ms (enqueue 0.504089 ms)
|
| 241 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7106 ms - Host latency: 1.93896 ms (enqueue 0.49657 ms)
|
| 242 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70969 ms - Host latency: 1.93649 ms (enqueue 0.491089 ms)
|
| 243 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7103 ms - Host latency: 1.93707 ms (enqueue 0.486182 ms)
|
| 244 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71001 ms - Host latency: 1.93781 ms (enqueue 0.489612 ms)
|
| 245 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71102 ms - Host latency: 1.93817 ms (enqueue 0.489429 ms)
|
| 246 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71095 ms - Host latency: 1.93896 ms (enqueue 0.487207 ms)
|
| 247 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71069 ms - Host latency: 1.93865 ms (enqueue 0.488501 ms)
|
| 248 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71012 ms - Host latency: 1.93799 ms (enqueue 0.488086 ms)
|
| 249 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71158 ms - Host latency: 1.93878 ms (enqueue 0.483508 ms)
|
| 250 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71119 ms - Host latency: 1.93899 ms (enqueue 0.484241 ms)
|
| 251 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71101 ms - Host latency: 1.93909 ms (enqueue 0.48418 ms)
|
| 252 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71003 ms - Host latency: 1.93771 ms (enqueue 0.483081 ms)
|
| 253 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71099 ms - Host latency: 1.93904 ms (enqueue 0.483838 ms)
|
| 254 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71007 ms - Host latency: 1.93698 ms (enqueue 0.509509 ms)
|
| 255 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71095 ms - Host latency: 1.93821 ms (enqueue 0.505933 ms)
|
| 256 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70959 ms - Host latency: 1.93735 ms (enqueue 0.506885 ms)
|
| 257 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71138 ms - Host latency: 1.939 ms (enqueue 0.510547 ms)
|
| 258 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71041 ms - Host latency: 1.93723 ms (enqueue 0.51355 ms)
|
| 259 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71085 ms - Host latency: 1.93929 ms (enqueue 0.492822 ms)
|
| 260 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71029 ms - Host latency: 1.93735 ms (enqueue 0.483582 ms)
|
| 261 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71025 ms - Host latency: 1.93787 ms (enqueue 0.487 ms)
|
| 262 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71088 ms - Host latency: 1.93818 ms (enqueue 0.494678 ms)
|
| 263 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71042 ms - Host latency: 1.93789 ms (enqueue 0.492749 ms)
|
| 264 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71086 ms - Host latency: 1.93881 ms (enqueue 0.4974 ms)
|
| 265 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71084 ms - Host latency: 1.93889 ms (enqueue 0.493628 ms)
|
| 266 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71056 ms - Host latency: 1.93756 ms (enqueue 0.491797 ms)
|
| 267 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70963 ms - Host latency: 1.93721 ms (enqueue 0.489026 ms)
|
| 268 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71068 ms - Host latency: 1.93844 ms (enqueue 0.493164 ms)
|
| 269 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71007 ms - Host latency: 1.93776 ms (enqueue 0.500098 ms)
|
| 270 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71019 ms - Host latency: 1.93741 ms (enqueue 0.496082 ms)
|
| 271 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71052 ms - Host latency: 1.93846 ms (enqueue 0.491418 ms)
|
| 272 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71047 ms - Host latency: 1.93795 ms (enqueue 0.490967 ms)
|
| 273 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71035 ms - Host latency: 1.93619 ms (enqueue 0.529456 ms)
|
| 274 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7104 ms - Host latency: 1.93649 ms (enqueue 0.532068 ms)
|
| 275 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71104 ms - Host latency: 1.93669 ms (enqueue 0.528735 ms)
|
| 276 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71011 ms - Host latency: 1.93809 ms (enqueue 0.502393 ms)
|
| 277 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71217 ms - Host latency: 1.93851 ms (enqueue 0.500793 ms)
|
| 278 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71025 ms - Host latency: 1.93676 ms (enqueue 0.500073 ms)
|
| 279 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71251 ms - Host latency: 1.93882 ms (enqueue 0.516321 ms)
|
| 280 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71044 ms - Host latency: 1.9354 ms (enqueue 0.537659 ms)
|
| 281 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71099 ms - Host latency: 1.93798 ms (enqueue 0.507983 ms)
|
| 282 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71084 ms - Host latency: 1.9364 ms (enqueue 0.50835 ms)
|
| 283 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71083 ms - Host latency: 1.93628 ms (enqueue 0.504089 ms)
|
| 284 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70999 ms - Host latency: 1.93597 ms (enqueue 0.499683 ms)
|
| 285 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71038 ms - Host latency: 1.93838 ms (enqueue 0.494019 ms)
|
| 286 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71086 ms - Host latency: 1.93784 ms (enqueue 0.51062 ms)
|
| 287 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7115 ms - Host latency: 1.93772 ms (enqueue 0.493457 ms)
|
| 288 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71006 ms - Host latency: 1.93782 ms (enqueue 0.483545 ms)
|
| 289 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71028 ms - Host latency: 1.93708 ms (enqueue 0.487329 ms)
|
| 290 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71084 ms - Host latency: 1.93865 ms (enqueue 0.489941 ms)
|
| 291 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71116 ms - Host latency: 1.93904 ms (enqueue 0.485889 ms)
|
| 292 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71084 ms - Host latency: 1.93857 ms (enqueue 0.486157 ms)
|
| 293 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71016 ms - Host latency: 1.93779 ms (enqueue 0.486475 ms)
|
| 294 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7103 ms - Host latency: 1.93708 ms (enqueue 0.48584 ms)
|
| 295 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71011 ms - Host latency: 1.93792 ms (enqueue 0.484448 ms)
|
| 296 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71001 ms - Host latency: 1.93623 ms (enqueue 0.488916 ms)
|
| 297 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71096 ms - Host latency: 1.93801 ms (enqueue 0.483936 ms)
|
| 298 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71084 ms - Host latency: 1.93806 ms (enqueue 0.488867 ms)
|
| 299 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71047 ms - Host latency: 1.93835 ms (enqueue 0.492798 ms)
|
| 300 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71118 ms - Host latency: 1.93921 ms (enqueue 0.486768 ms)
|
| 301 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71079 ms - Host latency: 1.93872 ms (enqueue 0.485181 ms)
|
| 302 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71138 ms - Host latency: 1.93867 ms (enqueue 0.483936 ms)
|
| 303 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71094 ms - Host latency: 1.93777 ms (enqueue 0.499438 ms)
|
| 304 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70984 ms - Host latency: 1.93628 ms (enqueue 0.50022 ms)
|
| 305 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71047 ms - Host latency: 1.9355 ms (enqueue 0.499927 ms)
|
| 306 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7116 ms - Host latency: 1.93843 ms (enqueue 0.500684 ms)
|
| 307 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71113 ms - Host latency: 1.93718 ms (enqueue 0.507959 ms)
|
| 308 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7113 ms - Host latency: 1.93931 ms (enqueue 0.488965 ms)
|
| 309 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71062 ms - Host latency: 1.93774 ms (enqueue 0.485059 ms)
|
| 310 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71091 ms - Host latency: 1.93845 ms (enqueue 0.490991 ms)
|
| 311 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71055 ms - Host latency: 1.9374 ms (enqueue 0.492236 ms)
|
| 312 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70974 ms - Host latency: 1.9374 ms (enqueue 0.486255 ms)
|
| 313 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71067 ms - Host latency: 1.93843 ms (enqueue 0.50271 ms)
|
| 314 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7106 ms - Host latency: 1.93862 ms (enqueue 0.49187 ms)
|
| 315 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71045 ms - Host latency: 1.9385 ms (enqueue 0.492554 ms)
|
| 316 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71018 ms - Host latency: 1.93811 ms (enqueue 0.496143 ms)
|
| 317 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71172 ms - Host latency: 1.93875 ms (enqueue 0.498096 ms)
|
| 318 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7106 ms - Host latency: 1.93706 ms (enqueue 0.504346 ms)
|
| 319 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71091 ms - Host latency: 1.9366 ms (enqueue 0.504492 ms)
|
| 320 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71038 ms - Host latency: 1.93704 ms (enqueue 0.495239 ms)
|
| 321 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71091 ms - Host latency: 1.9385 ms (enqueue 0.485205 ms)
|
| 322 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70989 ms - Host latency: 1.93721 ms (enqueue 0.489722 ms)
|
| 323 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70989 ms - Host latency: 1.93782 ms (enqueue 0.485229 ms)
|
| 324 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70991 ms - Host latency: 1.93748 ms (enqueue 0.490698 ms)
|
| 325 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71035 ms - Host latency: 1.9385 ms (enqueue 0.483789 ms)
|
| 326 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7114 ms - Host latency: 1.93926 ms (enqueue 0.487329 ms)
|
| 327 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7106 ms - Host latency: 1.93799 ms (enqueue 0.487085 ms)
|
| 328 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71052 ms - Host latency: 1.9377 ms (enqueue 0.490405 ms)
|
| 329 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70972 ms - Host latency: 1.93721 ms (enqueue 0.48623 ms)
|
| 330 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71067 ms - Host latency: 1.93787 ms (enqueue 0.498022 ms)
|
| 331 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71011 ms - Host latency: 1.93757 ms (enqueue 0.487817 ms)
|
| 332 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71047 ms - Host latency: 1.9373 ms (enqueue 0.514014 ms)
|
| 333 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71086 ms - Host latency: 1.93804 ms (enqueue 0.496875 ms)
|
| 334 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71045 ms - Host latency: 1.93767 ms (enqueue 0.50332 ms)
|
| 335 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71028 ms - Host latency: 1.9375 ms (enqueue 0.494556 ms)
|
| 336 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71018 ms - Host latency: 1.93831 ms (enqueue 0.491724 ms)
|
| 337 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71091 ms - Host latency: 1.93784 ms (enqueue 0.505933 ms)
|
| 338 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71018 ms - Host latency: 1.93669 ms (enqueue 0.577441 ms)
|
| 339 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71033 ms - Host latency: 1.93728 ms (enqueue 0.55769 ms)
|
| 340 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71062 ms - Host latency: 1.93906 ms (enqueue 0.520435 ms)
|
| 341 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71094 ms - Host latency: 1.93718 ms (enqueue 0.496729 ms)
|
| 342 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71089 ms - Host latency: 1.93809 ms (enqueue 0.491113 ms)
|
| 343 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71118 ms - Host latency: 1.93914 ms (enqueue 0.488135 ms)
|
| 344 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71047 ms - Host latency: 1.9385 ms (enqueue 0.494971 ms)
|
| 345 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.70981 ms - Host latency: 1.93738 ms (enqueue 0.502368 ms)
|
| 346 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7104 ms - Host latency: 1.93691 ms (enqueue 0.648169 ms)
|
| 347 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71089 ms - Host latency: 1.9333 ms (enqueue 0.93479 ms)
|
| 348 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7103 ms - Host latency: 1.93718 ms (enqueue 0.50459 ms)
|
| 349 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.71016 ms - Host latency: 1.93704 ms (enqueue 0.567969 ms)
|
| 350 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7103 ms - Host latency: 1.93767 ms (enqueue 0.62102 ms)
|
| 351 |
+
[01/20/2026-07:00:14] [I] Average on 10 runs - GPU latency: 1.7105 ms - Host latency: 1.93516 ms (enqueue 0.798364 ms)
|
| 352 |
+
[01/20/2026-07:00:14] [I]
|
| 353 |
+
[01/20/2026-07:00:14] [I] === Performance summary ===
|
| 354 |
+
[01/20/2026-07:00:14] [I] Throughput: 583.684 qps
|
| 355 |
+
[01/20/2026-07:00:14] [I] Latency: min = 1.92601 ms, max = 1.94611 ms, mean = 1.93759 ms, median = 1.93799 ms, percentile(90%) = 1.94092 ms, percentile(95%) = 1.94177 ms, percentile(99%) = 1.9436 ms
|
| 356 |
+
[01/20/2026-07:00:14] [I] Enqueue Time: min = 0.479126 ms, max = 1.23328 ms, mean = 0.526058 ms, median = 0.491699 ms, percentile(90%) = 0.543335 ms, percentile(95%) = 0.592285 ms, percentile(99%) = 1.12659 ms
|
| 357 |
+
[01/20/2026-07:00:14] [I] H2D Latency: min = 0.214355 ms, max = 0.231201 ms, mean = 0.222107 ms, median = 0.2229 ms, percentile(90%) = 0.223877 ms, percentile(95%) = 0.224121 ms, percentile(99%) = 0.225098 ms
|
| 358 |
+
[01/20/2026-07:00:14] [I] GPU Compute Time: min = 1.70532 ms, max = 1.71667 ms, mean = 1.71063 ms, median = 1.71039 ms, percentile(90%) = 1.71313 ms, percentile(95%) = 1.7135 ms, percentile(99%) = 1.71521 ms
|
| 359 |
+
[01/20/2026-07:00:14] [I] D2H Latency: min = 0.00415039 ms, max = 0.00675964 ms, mean = 0.00485429 ms, median = 0.00463867 ms, percentile(90%) = 0.00598145 ms, percentile(95%) = 0.00628662 ms, percentile(99%) = 0.00646973 ms
|
| 360 |
+
[01/20/2026-07:00:14] [I] Total Host Walltime: 3.00676 s
|
| 361 |
+
[01/20/2026-07:00:14] [I] Total GPU Compute Time: 3.00216 s
|
| 362 |
+
[01/20/2026-07:00:14] [I] Explanations of the performance metrics are printed in the verbose logs.
|
| 363 |
+
[01/20/2026-07:00:14] [I]
|
| 364 |
+
&&&& PASSED TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_dinov3_m_coco.onnx --saveEngine=checkpoints/deimv2_dinov3_m_coco.engine --fp16 --optShapes=images:1x3x640x640,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
deimv2_dinov3_m_coco.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:779597175f01f1a4caa6fd15134f15b0579675c2d98244caecbb977ca0f93421
|
| 3 |
+
size 73789261
|
deimv2_dinov3_m_coco.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ef951bfc71b177df65009d97506b3ed50a3b3b306729cc4584e37dc222c6706
|
| 3 |
+
size 73924409
|
deimv2_dinov3_s_coco.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a6ef345c31a70bb2dd7562a4f27d5406ada729a27eb18c830ba193f2259df70
|
| 3 |
+
size 26508204
|
deimv2_dinov3_s_coco.log
ADDED
|
@@ -0,0 +1,408 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
&&&& RUNNING TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_dinov3_s_coco.onnx --saveEngine=checkpoints/deimv2_dinov3_s_coco.engine --fp16 --optShapes=images:1x3x640x640,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
| 2 |
+
[01/20/2026-06:55:08] [W] optShapes is being broadcasted to minShapes for tensor orig_target_sizes
|
| 3 |
+
[01/20/2026-06:55:08] [W] optShapes is being broadcasted to maxShapes for tensor orig_target_sizes
|
| 4 |
+
[01/20/2026-06:55:08] [W] optShapes is being broadcasted to minShapes for tensor images
|
| 5 |
+
[01/20/2026-06:55:08] [W] optShapes is being broadcasted to maxShapes for tensor images
|
| 6 |
+
[01/20/2026-06:55:08] [W] Weakly-typed networks have been deprecated in TensorRT. You can use the AutoCast tool (https://nvidia.github.io/TensorRT-Model-Optimizer/guides/8_autocast.html) to convert the network to be strongly typed.
|
| 7 |
+
[01/20/2026-06:55:08] [I] === Model Options ===
|
| 8 |
+
[01/20/2026-06:55:08] [I] Format: ONNX
|
| 9 |
+
[01/20/2026-06:55:08] [I] Model: checkpoints/deimv2_dinov3_s_coco.onnx
|
| 10 |
+
[01/20/2026-06:55:08] [I] Output:
|
| 11 |
+
[01/20/2026-06:55:08] [I] === Build Options ===
|
| 12 |
+
[01/20/2026-06:55:08] [I] Memory Pools: workspace: 4096 MiB, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default, tacticSharedMem: default
|
| 13 |
+
[01/20/2026-06:55:08] [I] avgTiming: 8
|
| 14 |
+
[01/20/2026-06:55:08] [I] Precision: FP32+FP16
|
| 15 |
+
[01/20/2026-06:55:08] [I] LayerPrecisions:
|
| 16 |
+
[01/20/2026-06:55:08] [I] Layer Device Types:
|
| 17 |
+
[01/20/2026-06:55:08] [I] Decomposable Attentions:
|
| 18 |
+
[01/20/2026-06:55:08] [I] Calibration:
|
| 19 |
+
[01/20/2026-06:55:08] [I] Refit: Disabled
|
| 20 |
+
[01/20/2026-06:55:08] [I] Strip weights: Disabled
|
| 21 |
+
[01/20/2026-06:55:08] [I] Version Compatible: Disabled
|
| 22 |
+
[01/20/2026-06:55:08] [I] ONNX Plugin InstanceNorm: Disabled
|
| 23 |
+
[01/20/2026-06:55:08] [I] ONNX kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA flag: Disabled
|
| 24 |
+
[01/20/2026-06:55:08] [I] TensorRT runtime: full
|
| 25 |
+
[01/20/2026-06:55:08] [I] Lean DLL Path:
|
| 26 |
+
[01/20/2026-06:55:08] [I] Tempfile Controls: { in_memory: allow, temporary: allow }
|
| 27 |
+
[01/20/2026-06:55:08] [I] Exclude Lean Runtime: Disabled
|
| 28 |
+
[01/20/2026-06:55:08] [I] Sparsity: Disabled
|
| 29 |
+
[01/20/2026-06:55:08] [I] Safe mode: Disabled
|
| 30 |
+
[01/20/2026-06:55:08] [I] Build DLA standalone loadable: Disabled
|
| 31 |
+
[01/20/2026-06:55:08] [I] Allow GPU fallback for DLA: Disabled
|
| 32 |
+
[01/20/2026-06:55:08] [I] DirectIO mode: Disabled
|
| 33 |
+
[01/20/2026-06:55:08] [I] Restricted mode: Disabled
|
| 34 |
+
[01/20/2026-06:55:08] [I] Skip inference: Disabled
|
| 35 |
+
[01/20/2026-06:55:08] [I] Save engine: checkpoints/deimv2_dinov3_s_coco.engine
|
| 36 |
+
[01/20/2026-06:55:08] [I] Load engine:
|
| 37 |
+
[01/20/2026-06:55:08] [I] Profiling verbosity: 0
|
| 38 |
+
[01/20/2026-06:55:08] [I] Tactic sources: Using default tactic sources
|
| 39 |
+
[01/20/2026-06:55:08] [I] timingCacheMode: local
|
| 40 |
+
[01/20/2026-06:55:08] [I] timingCacheFile:
|
| 41 |
+
[01/20/2026-06:55:08] [I] Enable Compilation Cache: Enabled
|
| 42 |
+
[01/20/2026-06:55:08] [I] Enable Monitor Memory: Disabled
|
| 43 |
+
[01/20/2026-06:55:08] [I] errorOnTimingCacheMiss: Disabled
|
| 44 |
+
[01/20/2026-06:55:08] [I] Preview Features: Use default preview flags.
|
| 45 |
+
[01/20/2026-06:55:08] [I] MaxAuxStreams: -1
|
| 46 |
+
[01/20/2026-06:55:08] [I] BuilderOptimizationLevel: 3
|
| 47 |
+
[01/20/2026-06:55:08] [I] MaxTactics: -1
|
| 48 |
+
[01/20/2026-06:55:08] [I] Calibration Profile Index: 0
|
| 49 |
+
[01/20/2026-06:55:08] [I] Weight Streaming: Disabled
|
| 50 |
+
[01/20/2026-06:55:08] [I] Runtime Platform: Same As Build
|
| 51 |
+
[01/20/2026-06:55:08] [I] Debug Tensors:
|
| 52 |
+
[01/20/2026-06:55:08] [I] Distributive Independence: Disabled
|
| 53 |
+
[01/20/2026-06:55:08] [I] Mark Unfused Tensors As Debug Tensors: Disabled
|
| 54 |
+
[01/20/2026-06:55:08] [I] Input(s)s format: fp32:CHW
|
| 55 |
+
[01/20/2026-06:55:08] [I] Output(s)s format: fp32:CHW
|
| 56 |
+
[01/20/2026-06:55:08] [I] Input build shape (profile 0): images=1x3x640x640+1x3x640x640+1x3x640x640
|
| 57 |
+
[01/20/2026-06:55:08] [I] Input build shape (profile 0): orig_target_sizes=1x2+1x2+1x2
|
| 58 |
+
[01/20/2026-06:55:08] [I] Input calibration shapes: model
|
| 59 |
+
[01/20/2026-06:55:08] [I] === System Options ===
|
| 60 |
+
[01/20/2026-06:55:08] [I] Device: 0
|
| 61 |
+
[01/20/2026-06:55:08] [I] DLACore:
|
| 62 |
+
[01/20/2026-06:55:08] [I] Plugins:
|
| 63 |
+
[01/20/2026-06:55:08] [I] setPluginsToSerialize:
|
| 64 |
+
[01/20/2026-06:55:08] [I] dynamicPlugins:
|
| 65 |
+
[01/20/2026-06:55:08] [I] ignoreParsedPluginLibs: 0
|
| 66 |
+
[01/20/2026-06:55:08] [I]
|
| 67 |
+
[01/20/2026-06:55:08] [I] === Inference Options ===
|
| 68 |
+
[01/20/2026-06:55:08] [I] Batch: Explicit
|
| 69 |
+
[01/20/2026-06:55:08] [I] Input inference shape : orig_target_sizes=1x2
|
| 70 |
+
[01/20/2026-06:55:08] [I] Input inference shape : images=1x3x640x640
|
| 71 |
+
[01/20/2026-06:55:08] [I] Iterations: 10
|
| 72 |
+
[01/20/2026-06:55:08] [I] Duration: 3s (+ 200ms warm up)
|
| 73 |
+
[01/20/2026-06:55:08] [I] Sleep time: 0ms
|
| 74 |
+
[01/20/2026-06:55:08] [I] Idle time: 0ms
|
| 75 |
+
[01/20/2026-06:55:08] [I] Inference Streams: 1
|
| 76 |
+
[01/20/2026-06:55:08] [I] ExposeDMA: Disabled
|
| 77 |
+
[01/20/2026-06:55:08] [I] Data transfers: Enabled
|
| 78 |
+
[01/20/2026-06:55:08] [I] Spin-wait: Disabled
|
| 79 |
+
[01/20/2026-06:55:08] [I] Multithreading: Disabled
|
| 80 |
+
[01/20/2026-06:55:08] [I] CUDA Graph: Disabled
|
| 81 |
+
[01/20/2026-06:55:08] [I] Separate profiling: Disabled
|
| 82 |
+
[01/20/2026-06:55:08] [I] Time Deserialize: Disabled
|
| 83 |
+
[01/20/2026-06:55:08] [I] Time Refit: Disabled
|
| 84 |
+
[01/20/2026-06:55:08] [I] NVTX verbosity: 0
|
| 85 |
+
[01/20/2026-06:55:08] [I] Persistent Cache Ratio: 0
|
| 86 |
+
[01/20/2026-06:55:08] [I] Optimization Profile Index: 0
|
| 87 |
+
[01/20/2026-06:55:08] [I] Weight Streaming Budget: 100.000000%
|
| 88 |
+
[01/20/2026-06:55:08] [I] Inputs:
|
| 89 |
+
[01/20/2026-06:55:08] [I] Debug Tensor Save Destinations:
|
| 90 |
+
[01/20/2026-06:55:08] [I] Dump All Debug Tensor in Formats:
|
| 91 |
+
[01/20/2026-06:55:08] [I] === Reporting Options ===
|
| 92 |
+
[01/20/2026-06:55:08] [I] Verbose: Disabled
|
| 93 |
+
[01/20/2026-06:55:08] [I] Averages: 10 inferences
|
| 94 |
+
[01/20/2026-06:55:08] [I] Percentiles: 90,95,99
|
| 95 |
+
[01/20/2026-06:55:08] [I] Dump refittable layers:Disabled
|
| 96 |
+
[01/20/2026-06:55:08] [I] Dump output: Disabled
|
| 97 |
+
[01/20/2026-06:55:08] [I] Profile: Disabled
|
| 98 |
+
[01/20/2026-06:55:08] [I] Export timing to JSON file:
|
| 99 |
+
[01/20/2026-06:55:08] [I] Export output to JSON file:
|
| 100 |
+
[01/20/2026-06:55:08] [I] Export profile to JSON file:
|
| 101 |
+
[01/20/2026-06:55:08] [I]
|
| 102 |
+
[01/20/2026-06:55:08] [I] === Device Information ===
|
| 103 |
+
[01/20/2026-06:55:08] [I] Available Devices:
|
| 104 |
+
[01/20/2026-06:55:08] [I] Device 0: "NVIDIA GeForce RTX 4090" UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 105 |
+
[01/20/2026-06:55:08] [I] Selected Device: NVIDIA GeForce RTX 4090
|
| 106 |
+
[01/20/2026-06:55:08] [I] Selected Device ID: 0
|
| 107 |
+
[01/20/2026-06:55:08] [I] Selected Device UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 108 |
+
[01/20/2026-06:55:08] [I] Compute Capability: 8.9
|
| 109 |
+
[01/20/2026-06:55:08] [I] SMs: 128
|
| 110 |
+
[01/20/2026-06:55:08] [I] Device Global Memory: 24071 MiB
|
| 111 |
+
[01/20/2026-06:55:08] [I] Shared Memory per SM: 100 KiB
|
| 112 |
+
[01/20/2026-06:55:08] [I] Memory Bus Width: 384 bits (ECC disabled)
|
| 113 |
+
[01/20/2026-06:55:08] [I] Application Compute Clock Rate: 2.52 GHz
|
| 114 |
+
[01/20/2026-06:55:08] [I] Application Memory Clock Rate: 10.501 GHz
|
| 115 |
+
[01/20/2026-06:55:08] [I]
|
| 116 |
+
[01/20/2026-06:55:08] [I] Note: The application clock rates do not reflect the actual clock rates that the GPU is currently running at.
|
| 117 |
+
[01/20/2026-06:55:08] [I]
|
| 118 |
+
[01/20/2026-06:55:08] [I] TensorRT version: 10.14.1
|
| 119 |
+
[01/20/2026-06:55:08] [I] Loading standard plugins
|
| 120 |
+
[01/20/2026-06:55:08] [I] [TRT] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 29, GPU 10549 (MiB)
|
| 121 |
+
[01/20/2026-06:55:08] [I] Start parsing network model.
|
| 122 |
+
[01/20/2026-06:55:08] [I] [TRT] ----------------------------------------------------------------
|
| 123 |
+
[01/20/2026-06:55:08] [I] [TRT] Input filename: checkpoints/deimv2_dinov3_s_coco.onnx
|
| 124 |
+
[01/20/2026-06:55:08] [I] [TRT] ONNX IR version: 0.0.8
|
| 125 |
+
[01/20/2026-06:55:08] [I] [TRT] Opset version: 17
|
| 126 |
+
[01/20/2026-06:55:08] [I] [TRT] Producer name: pytorch
|
| 127 |
+
[01/20/2026-06:55:08] [I] [TRT] Producer version: 2.10.0
|
| 128 |
+
[01/20/2026-06:55:08] [I] [TRT] Domain:
|
| 129 |
+
[01/20/2026-06:55:08] [I] [TRT] Model version: 0
|
| 130 |
+
[01/20/2026-06:55:08] [I] [TRT] Doc string:
|
| 131 |
+
[01/20/2026-06:55:08] [I] [TRT] ----------------------------------------------------------------
|
| 132 |
+
[01/20/2026-06:55:08] [W] [TRT] ModelImporter.cpp:661: Make sure input orig_target_sizes has Int64 binding.
|
| 133 |
+
[01/20/2026-06:55:09] [W] [TRT] ModelImporter.cpp:908: Make sure output labels has Int64 binding.
|
| 134 |
+
[01/20/2026-06:55:09] [I] Finished parsing network model. Parse time: 0.0945442
|
| 135 |
+
[01/20/2026-06:55:09] [I] Set shape of input tensor images for optimization profile 0 to: MIN=1x3x640x640 OPT=1x3x640x640 MAX=1x3x640x640
|
| 136 |
+
[01/20/2026-06:55:09] [I] Set shape of input tensor orig_target_sizes for optimization profile 0 to: MIN=1x2 OPT=1x2 MAX=1x2
|
| 137 |
+
[01/20/2026-06:55:09] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +204, GPU +4, now: CPU 571, GPU 10553 (MiB)
|
| 138 |
+
[01/20/2026-06:55:09] [W] [TRT] Detected layernorm nodes in FP16.
|
| 139 |
+
[01/20/2026-06:55:09] [W] [TRT] Running layernorm after self-attention with FP16 Reduce or Pow may cause overflow. Forcing Reduce or Pow Layers in FP32 precision, or exporting the model to use INormalizationLayer (available with ONNX opset >= 17) can help preserving accuracy.
|
| 140 |
+
[01/20/2026-06:55:09] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored.
|
| 141 |
+
[01/20/2026-06:55:52] [I] [TRT] Compiler backend is used during engine build.
|
| 142 |
+
[01/20/2026-06:57:39] [I] [TRT] Detected 2 inputs and 3 output network tensors.
|
| 143 |
+
[01/20/2026-06:57:39] [I] [TRT] Total Host Persistent Memory: 281504 bytes
|
| 144 |
+
[01/20/2026-06:57:39] [I] [TRT] Total Device Persistent Memory: 3072 bytes
|
| 145 |
+
[01/20/2026-06:57:39] [I] [TRT] Max Scratch Memory: 9665024 bytes
|
| 146 |
+
[01/20/2026-06:57:39] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 91 steps to complete.
|
| 147 |
+
[01/20/2026-06:57:39] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 2.89015ms to assign 11 blocks to 91 nodes requiring 21496320 bytes.
|
| 148 |
+
[01/20/2026-06:57:39] [I] [TRT] Total Activation Memory: 21496320 bytes
|
| 149 |
+
[01/20/2026-06:57:39] [I] [TRT] Total Weights Memory: 19740416 bytes
|
| 150 |
+
[01/20/2026-06:57:40] [I] [TRT] Compiler backend is used during engine execution.
|
| 151 |
+
[01/20/2026-06:57:40] [I] [TRT] Engine generation completed in 150.685 seconds.
|
| 152 |
+
[01/20/2026-06:57:40] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 11 MiB, GPU 93 MiB
|
| 153 |
+
[01/20/2026-06:57:40] [I] Created engine with size: 25.2802 MiB
|
| 154 |
+
[01/20/2026-06:57:40] [I] Engine built in 151.034 sec.
|
| 155 |
+
[01/20/2026-06:57:40] [I] [TRT] Loaded engine size: 25 MiB
|
| 156 |
+
[01/20/2026-06:57:40] [I] Engine deserialized in 0.0153845 sec.
|
| 157 |
+
[01/20/2026-06:57:40] [I] [TRT] [MS] Running engine with multi stream info
|
| 158 |
+
[01/20/2026-06:57:40] [I] [TRT] [MS] Number of aux streams is 2
|
| 159 |
+
[01/20/2026-06:57:40] [I] [TRT] [MS] Number of total worker streams is 3
|
| 160 |
+
[01/20/2026-06:57:40] [I] [TRT] [MS] The main stream provided by execute/enqueue calls is the first worker stream
|
| 161 |
+
[01/20/2026-06:57:40] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +21, now: CPU 0, GPU 39 (MiB)
|
| 162 |
+
[01/20/2026-06:57:40] [I] Setting persistentCacheLimit to 0 bytes.
|
| 163 |
+
[01/20/2026-06:57:40] [I] Created execution context with device memory size: 20.5005 MiB
|
| 164 |
+
[01/20/2026-06:57:40] [I] Using random values for input images
|
| 165 |
+
[01/20/2026-06:57:40] [I] Input binding for images with dimensions 1x3x640x640 is created.
|
| 166 |
+
[01/20/2026-06:57:40] [I] Using random values for input orig_target_sizes
|
| 167 |
+
[01/20/2026-06:57:40] [I] Input binding for orig_target_sizes with dimensions 1x2 is created.
|
| 168 |
+
[01/20/2026-06:57:40] [I] Output binding for labels with dimensions 1x300 is created.
|
| 169 |
+
[01/20/2026-06:57:40] [I] Output binding for boxes with dimensions 1x300x4 is created.
|
| 170 |
+
[01/20/2026-06:57:40] [I] Output binding for scores with dimensions 1x300 is created.
|
| 171 |
+
[01/20/2026-06:57:40] [I] Starting inference
|
| 172 |
+
[01/20/2026-06:57:43] [I] Warmup completed 146 queries over 200 ms
|
| 173 |
+
[01/20/2026-06:57:43] [I] Timing trace has 2199 queries over 3.00392 s
|
| 174 |
+
[01/20/2026-06:57:43] [I]
|
| 175 |
+
[01/20/2026-06:57:43] [I] === Trace details ===
|
| 176 |
+
[01/20/2026-06:57:43] [I] Trace averages of 10 runs:
|
| 177 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.35834 ms - Host latency: 1.58537 ms (enqueue 0.439809 ms)
|
| 178 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36411 ms - Host latency: 1.59102 ms (enqueue 0.443648 ms)
|
| 179 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36554 ms - Host latency: 1.59115 ms (enqueue 0.441405 ms)
|
| 180 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36572 ms - Host latency: 1.59173 ms (enqueue 0.450684 ms)
|
| 181 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36576 ms - Host latency: 1.59286 ms (enqueue 0.442368 ms)
|
| 182 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36451 ms - Host latency: 1.58991 ms (enqueue 0.441461 ms)
|
| 183 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36351 ms - Host latency: 1.58781 ms (enqueue 0.449054 ms)
|
| 184 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36335 ms - Host latency: 1.58888 ms (enqueue 0.447015 ms)
|
| 185 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3633 ms - Host latency: 1.59006 ms (enqueue 0.444852 ms)
|
| 186 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3643 ms - Host latency: 1.59003 ms (enqueue 0.444052 ms)
|
| 187 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36356 ms - Host latency: 1.58947 ms (enqueue 0.443555 ms)
|
| 188 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36461 ms - Host latency: 1.59058 ms (enqueue 0.444547 ms)
|
| 189 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36305 ms - Host latency: 1.58903 ms (enqueue 0.443799 ms)
|
| 190 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36307 ms - Host latency: 1.58965 ms (enqueue 0.44136 ms)
|
| 191 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36357 ms - Host latency: 1.58973 ms (enqueue 0.445804 ms)
|
| 192 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36357 ms - Host latency: 1.58859 ms (enqueue 0.439398 ms)
|
| 193 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36337 ms - Host latency: 1.58787 ms (enqueue 0.458829 ms)
|
| 194 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36386 ms - Host latency: 1.58777 ms (enqueue 0.470523 ms)
|
| 195 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36315 ms - Host latency: 1.58919 ms (enqueue 0.432932 ms)
|
| 196 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36291 ms - Host latency: 1.58851 ms (enqueue 0.44093 ms)
|
| 197 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3643 ms - Host latency: 1.59142 ms (enqueue 0.436078 ms)
|
| 198 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36438 ms - Host latency: 1.59136 ms (enqueue 0.441199 ms)
|
| 199 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36407 ms - Host latency: 1.58911 ms (enqueue 0.437918 ms)
|
| 200 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36408 ms - Host latency: 1.59089 ms (enqueue 0.447729 ms)
|
| 201 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36367 ms - Host latency: 1.59078 ms (enqueue 0.435645 ms)
|
| 202 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36422 ms - Host latency: 1.59025 ms (enqueue 0.434961 ms)
|
| 203 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3635 ms - Host latency: 1.591 ms (enqueue 0.440906 ms)
|
| 204 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36263 ms - Host latency: 1.58801 ms (enqueue 0.434052 ms)
|
| 205 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36351 ms - Host latency: 1.58947 ms (enqueue 0.462219 ms)
|
| 206 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36393 ms - Host latency: 1.59056 ms (enqueue 0.443243 ms)
|
| 207 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36391 ms - Host latency: 1.59028 ms (enqueue 0.438641 ms)
|
| 208 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36307 ms - Host latency: 1.58887 ms (enqueue 0.445636 ms)
|
| 209 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36342 ms - Host latency: 1.59011 ms (enqueue 0.444208 ms)
|
| 210 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36346 ms - Host latency: 1.59023 ms (enqueue 0.444598 ms)
|
| 211 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36316 ms - Host latency: 1.58997 ms (enqueue 0.445331 ms)
|
| 212 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36346 ms - Host latency: 1.59006 ms (enqueue 0.439972 ms)
|
| 213 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36443 ms - Host latency: 1.58953 ms (enqueue 0.478406 ms)
|
| 214 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36407 ms - Host latency: 1.58942 ms (enqueue 0.472168 ms)
|
| 215 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36367 ms - Host latency: 1.59003 ms (enqueue 0.450946 ms)
|
| 216 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36269 ms - Host latency: 1.58821 ms (enqueue 0.452094 ms)
|
| 217 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3634 ms - Host latency: 1.58981 ms (enqueue 0.436835 ms)
|
| 218 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36327 ms - Host latency: 1.58932 ms (enqueue 0.447278 ms)
|
| 219 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36411 ms - Host latency: 1.58897 ms (enqueue 0.771808 ms)
|
| 220 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36356 ms - Host latency: 1.58953 ms (enqueue 0.460181 ms)
|
| 221 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36378 ms - Host latency: 1.59033 ms (enqueue 0.450714 ms)
|
| 222 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36313 ms - Host latency: 1.58844 ms (enqueue 0.436713 ms)
|
| 223 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36343 ms - Host latency: 1.59 ms (enqueue 0.440601 ms)
|
| 224 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36338 ms - Host latency: 1.58936 ms (enqueue 0.438214 ms)
|
| 225 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36384 ms - Host latency: 1.58855 ms (enqueue 0.452051 ms)
|
| 226 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36315 ms - Host latency: 1.58967 ms (enqueue 0.441931 ms)
|
| 227 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36335 ms - Host latency: 1.58993 ms (enqueue 0.439587 ms)
|
| 228 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.364 ms - Host latency: 1.59079 ms (enqueue 0.441016 ms)
|
| 229 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3636 ms - Host latency: 1.59019 ms (enqueue 0.434497 ms)
|
| 230 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36348 ms - Host latency: 1.59001 ms (enqueue 0.440436 ms)
|
| 231 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36329 ms - Host latency: 1.58978 ms (enqueue 0.456458 ms)
|
| 232 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.363 ms - Host latency: 1.58828 ms (enqueue 0.451471 ms)
|
| 233 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36321 ms - Host latency: 1.58891 ms (enqueue 0.444556 ms)
|
| 234 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36294 ms - Host latency: 1.58796 ms (enqueue 0.443604 ms)
|
| 235 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36411 ms - Host latency: 1.59097 ms (enqueue 0.445068 ms)
|
| 236 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3638 ms - Host latency: 1.59024 ms (enqueue 0.443127 ms)
|
| 237 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36346 ms - Host latency: 1.58997 ms (enqueue 0.440167 ms)
|
| 238 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36389 ms - Host latency: 1.5899 ms (enqueue 0.445129 ms)
|
| 239 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36367 ms - Host latency: 1.59065 ms (enqueue 0.446313 ms)
|
| 240 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36328 ms - Host latency: 1.58978 ms (enqueue 0.444324 ms)
|
| 241 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36334 ms - Host latency: 1.59003 ms (enqueue 0.439246 ms)
|
| 242 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36407 ms - Host latency: 1.59083 ms (enqueue 0.438879 ms)
|
| 243 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36386 ms - Host latency: 1.59032 ms (enqueue 0.441052 ms)
|
| 244 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36317 ms - Host latency: 1.58983 ms (enqueue 0.438794 ms)
|
| 245 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36306 ms - Host latency: 1.58959 ms (enqueue 0.438867 ms)
|
| 246 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36301 ms - Host latency: 1.58944 ms (enqueue 0.438855 ms)
|
| 247 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36361 ms - Host latency: 1.59039 ms (enqueue 0.441309 ms)
|
| 248 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36296 ms - Host latency: 1.58978 ms (enqueue 0.44148 ms)
|
| 249 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3641 ms - Host latency: 1.59025 ms (enqueue 0.442981 ms)
|
| 250 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36499 ms - Host latency: 1.59066 ms (enqueue 0.446216 ms)
|
| 251 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36328 ms - Host latency: 1.58986 ms (enqueue 0.44093 ms)
|
| 252 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3645 ms - Host latency: 1.59019 ms (enqueue 0.435937 ms)
|
| 253 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36423 ms - Host latency: 1.59133 ms (enqueue 0.439709 ms)
|
| 254 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36431 ms - Host latency: 1.58954 ms (enqueue 0.442932 ms)
|
| 255 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36331 ms - Host latency: 1.58868 ms (enqueue 0.445911 ms)
|
| 256 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36373 ms - Host latency: 1.58958 ms (enqueue 0.438513 ms)
|
| 257 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3636 ms - Host latency: 1.5901 ms (enqueue 0.435034 ms)
|
| 258 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36407 ms - Host latency: 1.59045 ms (enqueue 0.4354 ms)
|
| 259 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36396 ms - Host latency: 1.59104 ms (enqueue 0.460461 ms)
|
| 260 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36338 ms - Host latency: 1.58949 ms (enqueue 0.454236 ms)
|
| 261 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36307 ms - Host latency: 1.58958 ms (enqueue 0.442126 ms)
|
| 262 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36404 ms - Host latency: 1.58927 ms (enqueue 0.439563 ms)
|
| 263 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36359 ms - Host latency: 1.59045 ms (enqueue 0.442273 ms)
|
| 264 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36388 ms - Host latency: 1.59047 ms (enqueue 0.442029 ms)
|
| 265 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36395 ms - Host latency: 1.58977 ms (enqueue 0.440356 ms)
|
| 266 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36344 ms - Host latency: 1.58864 ms (enqueue 0.445386 ms)
|
| 267 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36396 ms - Host latency: 1.58995 ms (enqueue 0.444177 ms)
|
| 268 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36354 ms - Host latency: 1.58967 ms (enqueue 0.442737 ms)
|
| 269 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36423 ms - Host latency: 1.58945 ms (enqueue 0.440112 ms)
|
| 270 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3634 ms - Host latency: 1.59 ms (enqueue 0.437964 ms)
|
| 271 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36364 ms - Host latency: 1.59041 ms (enqueue 0.438586 ms)
|
| 272 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36342 ms - Host latency: 1.58943 ms (enqueue 0.441638 ms)
|
| 273 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36395 ms - Host latency: 1.59001 ms (enqueue 0.438611 ms)
|
| 274 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36306 ms - Host latency: 1.58876 ms (enqueue 0.437866 ms)
|
| 275 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36351 ms - Host latency: 1.58987 ms (enqueue 0.441199 ms)
|
| 276 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36368 ms - Host latency: 1.59054 ms (enqueue 0.443579 ms)
|
| 277 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36366 ms - Host latency: 1.58575 ms (enqueue 0.514673 ms)
|
| 278 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36473 ms - Host latency: 1.58927 ms (enqueue 0.471899 ms)
|
| 279 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36323 ms - Host latency: 1.58971 ms (enqueue 0.443347 ms)
|
| 280 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36383 ms - Host latency: 1.59017 ms (enqueue 0.43667 ms)
|
| 281 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36367 ms - Host latency: 1.59031 ms (enqueue 0.436035 ms)
|
| 282 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36333 ms - Host latency: 1.58923 ms (enqueue 0.445215 ms)
|
| 283 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36353 ms - Host latency: 1.58886 ms (enqueue 0.43667 ms)
|
| 284 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36255 ms - Host latency: 1.58766 ms (enqueue 0.435669 ms)
|
| 285 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36307 ms - Host latency: 1.58983 ms (enqueue 0.440649 ms)
|
| 286 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36399 ms - Host latency: 1.58915 ms (enqueue 0.43988 ms)
|
| 287 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36412 ms - Host latency: 1.5907 ms (enqueue 0.446997 ms)
|
| 288 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3639 ms - Host latency: 1.59098 ms (enqueue 0.44856 ms)
|
| 289 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36451 ms - Host latency: 1.59078 ms (enqueue 0.437244 ms)
|
| 290 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36326 ms - Host latency: 1.58932 ms (enqueue 0.445728 ms)
|
| 291 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36398 ms - Host latency: 1.59054 ms (enqueue 0.439539 ms)
|
| 292 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3637 ms - Host latency: 1.59021 ms (enqueue 0.442529 ms)
|
| 293 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36335 ms - Host latency: 1.58967 ms (enqueue 0.438489 ms)
|
| 294 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36267 ms - Host latency: 1.58927 ms (enqueue 0.439697 ms)
|
| 295 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36331 ms - Host latency: 1.58962 ms (enqueue 0.440845 ms)
|
| 296 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36307 ms - Host latency: 1.58955 ms (enqueue 0.440918 ms)
|
| 297 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36354 ms - Host latency: 1.59021 ms (enqueue 0.434119 ms)
|
| 298 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36375 ms - Host latency: 1.58995 ms (enqueue 0.448096 ms)
|
| 299 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36323 ms - Host latency: 1.59006 ms (enqueue 0.442773 ms)
|
| 300 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3635 ms - Host latency: 1.58994 ms (enqueue 0.443115 ms)
|
| 301 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36307 ms - Host latency: 1.58976 ms (enqueue 0.442371 ms)
|
| 302 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36318 ms - Host latency: 1.58975 ms (enqueue 0.439624 ms)
|
| 303 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36328 ms - Host latency: 1.59004 ms (enqueue 0.44209 ms)
|
| 304 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36339 ms - Host latency: 1.58943 ms (enqueue 0.447375 ms)
|
| 305 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36345 ms - Host latency: 1.5887 ms (enqueue 0.446582 ms)
|
| 306 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3639 ms - Host latency: 1.59076 ms (enqueue 0.447876 ms)
|
| 307 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36449 ms - Host latency: 1.59119 ms (enqueue 0.446411 ms)
|
| 308 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36377 ms - Host latency: 1.59092 ms (enqueue 0.443127 ms)
|
| 309 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36384 ms - Host latency: 1.58966 ms (enqueue 0.437134 ms)
|
| 310 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36396 ms - Host latency: 1.59044 ms (enqueue 0.439563 ms)
|
| 311 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36298 ms - Host latency: 1.58964 ms (enqueue 0.436792 ms)
|
| 312 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36285 ms - Host latency: 1.58849 ms (enqueue 0.454407 ms)
|
| 313 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3644 ms - Host latency: 1.59087 ms (enqueue 0.446143 ms)
|
| 314 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36292 ms - Host latency: 1.58948 ms (enqueue 0.439722 ms)
|
| 315 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36367 ms - Host latency: 1.59043 ms (enqueue 0.439331 ms)
|
| 316 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36282 ms - Host latency: 1.58804 ms (enqueue 0.463892 ms)
|
| 317 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36533 ms - Host latency: 1.59038 ms (enqueue 0.445728 ms)
|
| 318 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36367 ms - Host latency: 1.58921 ms (enqueue 0.451514 ms)
|
| 319 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36377 ms - Host latency: 1.59084 ms (enqueue 0.444482 ms)
|
| 320 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36379 ms - Host latency: 1.58928 ms (enqueue 0.471216 ms)
|
| 321 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36372 ms - Host latency: 1.58584 ms (enqueue 1.03264 ms)
|
| 322 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36384 ms - Host latency: 1.59001 ms (enqueue 0.439404 ms)
|
| 323 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36367 ms - Host latency: 1.58979 ms (enqueue 0.432397 ms)
|
| 324 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36406 ms - Host latency: 1.59026 ms (enqueue 0.438867 ms)
|
| 325 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36399 ms - Host latency: 1.59099 ms (enqueue 0.441235 ms)
|
| 326 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36431 ms - Host latency: 1.58938 ms (enqueue 0.439136 ms)
|
| 327 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3637 ms - Host latency: 1.58992 ms (enqueue 0.435034 ms)
|
| 328 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36431 ms - Host latency: 1.59045 ms (enqueue 0.446167 ms)
|
| 329 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36372 ms - Host latency: 1.5905 ms (enqueue 0.439868 ms)
|
| 330 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36387 ms - Host latency: 1.59048 ms (enqueue 0.437817 ms)
|
| 331 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36426 ms - Host latency: 1.59072 ms (enqueue 0.437793 ms)
|
| 332 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36399 ms - Host latency: 1.5906 ms (enqueue 0.436206 ms)
|
| 333 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36384 ms - Host latency: 1.5897 ms (enqueue 0.457056 ms)
|
| 334 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36401 ms - Host latency: 1.58594 ms (enqueue 0.492017 ms)
|
| 335 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36375 ms - Host latency: 1.58823 ms (enqueue 0.459448 ms)
|
| 336 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36404 ms - Host latency: 1.5905 ms (enqueue 0.438794 ms)
|
| 337 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36399 ms - Host latency: 1.59077 ms (enqueue 0.441895 ms)
|
| 338 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36423 ms - Host latency: 1.59053 ms (enqueue 0.44104 ms)
|
| 339 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36379 ms - Host latency: 1.59041 ms (enqueue 0.439673 ms)
|
| 340 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36335 ms - Host latency: 1.58899 ms (enqueue 0.459937 ms)
|
| 341 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36431 ms - Host latency: 1.5896 ms (enqueue 0.444531 ms)
|
| 342 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36477 ms - Host latency: 1.59155 ms (enqueue 0.439697 ms)
|
| 343 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3637 ms - Host latency: 1.58909 ms (enqueue 0.438525 ms)
|
| 344 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3637 ms - Host latency: 1.58887 ms (enqueue 0.438477 ms)
|
| 345 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36343 ms - Host latency: 1.58784 ms (enqueue 0.444067 ms)
|
| 346 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36494 ms - Host latency: 1.59221 ms (enqueue 0.439429 ms)
|
| 347 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36348 ms - Host latency: 1.58997 ms (enqueue 0.446143 ms)
|
| 348 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36438 ms - Host latency: 1.59109 ms (enqueue 0.44978 ms)
|
| 349 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36401 ms - Host latency: 1.59106 ms (enqueue 0.446118 ms)
|
| 350 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36528 ms - Host latency: 1.59194 ms (enqueue 0.448413 ms)
|
| 351 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36492 ms - Host latency: 1.59028 ms (enqueue 0.45022 ms)
|
| 352 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36418 ms - Host latency: 1.59033 ms (enqueue 0.449512 ms)
|
| 353 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36418 ms - Host latency: 1.58845 ms (enqueue 0.497168 ms)
|
| 354 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36526 ms - Host latency: 1.59045 ms (enqueue 0.459277 ms)
|
| 355 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36362 ms - Host latency: 1.58826 ms (enqueue 0.460718 ms)
|
| 356 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3645 ms - Host latency: 1.59138 ms (enqueue 0.449902 ms)
|
| 357 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36389 ms - Host latency: 1.58931 ms (enqueue 0.452368 ms)
|
| 358 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3636 ms - Host latency: 1.58979 ms (enqueue 0.448291 ms)
|
| 359 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36406 ms - Host latency: 1.59009 ms (enqueue 0.449634 ms)
|
| 360 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3637 ms - Host latency: 1.58862 ms (enqueue 0.46543 ms)
|
| 361 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36409 ms - Host latency: 1.59097 ms (enqueue 0.450562 ms)
|
| 362 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36399 ms - Host latency: 1.5906 ms (enqueue 0.452661 ms)
|
| 363 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36404 ms - Host latency: 1.59038 ms (enqueue 0.44873 ms)
|
| 364 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36404 ms - Host latency: 1.59102 ms (enqueue 0.445532 ms)
|
| 365 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36331 ms - Host latency: 1.59001 ms (enqueue 0.447095 ms)
|
| 366 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3644 ms - Host latency: 1.59131 ms (enqueue 0.448535 ms)
|
| 367 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3634 ms - Host latency: 1.58972 ms (enqueue 0.448926 ms)
|
| 368 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36367 ms - Host latency: 1.58906 ms (enqueue 0.449585 ms)
|
| 369 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36404 ms - Host latency: 1.59087 ms (enqueue 0.441382 ms)
|
| 370 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36362 ms - Host latency: 1.59041 ms (enqueue 0.437012 ms)
|
| 371 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36333 ms - Host latency: 1.59031 ms (enqueue 0.441211 ms)
|
| 372 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36487 ms - Host latency: 1.59082 ms (enqueue 0.438501 ms)
|
| 373 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36321 ms - Host latency: 1.59004 ms (enqueue 0.435645 ms)
|
| 374 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36292 ms - Host latency: 1.58921 ms (enqueue 0.438892 ms)
|
| 375 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36433 ms - Host latency: 1.59072 ms (enqueue 0.434082 ms)
|
| 376 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36384 ms - Host latency: 1.58962 ms (enqueue 0.438867 ms)
|
| 377 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36387 ms - Host latency: 1.59011 ms (enqueue 0.4448 ms)
|
| 378 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36426 ms - Host latency: 1.59116 ms (enqueue 0.438916 ms)
|
| 379 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36318 ms - Host latency: 1.58914 ms (enqueue 0.440454 ms)
|
| 380 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36372 ms - Host latency: 1.59043 ms (enqueue 0.43689 ms)
|
| 381 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36462 ms - Host latency: 1.59084 ms (enqueue 0.449536 ms)
|
| 382 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36365 ms - Host latency: 1.5905 ms (enqueue 0.443726 ms)
|
| 383 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36409 ms - Host latency: 1.59077 ms (enqueue 0.439819 ms)
|
| 384 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3635 ms - Host latency: 1.58977 ms (enqueue 0.448389 ms)
|
| 385 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3634 ms - Host latency: 1.59011 ms (enqueue 0.440186 ms)
|
| 386 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36335 ms - Host latency: 1.58994 ms (enqueue 0.436816 ms)
|
| 387 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36379 ms - Host latency: 1.59033 ms (enqueue 0.43562 ms)
|
| 388 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36387 ms - Host latency: 1.59082 ms (enqueue 0.438452 ms)
|
| 389 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3634 ms - Host latency: 1.58987 ms (enqueue 0.439624 ms)
|
| 390 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3637 ms - Host latency: 1.59036 ms (enqueue 0.44209 ms)
|
| 391 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36389 ms - Host latency: 1.58984 ms (enqueue 0.443994 ms)
|
| 392 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36423 ms - Host latency: 1.59028 ms (enqueue 0.436646 ms)
|
| 393 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.3646 ms - Host latency: 1.59121 ms (enqueue 0.435034 ms)
|
| 394 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36331 ms - Host latency: 1.58965 ms (enqueue 0.443726 ms)
|
| 395 |
+
[01/20/2026-06:57:43] [I] Average on 10 runs - GPU latency: 1.36426 ms - Host latency: 1.58589 ms (enqueue 0.503564 ms)
|
| 396 |
+
[01/20/2026-06:57:43] [I]
|
| 397 |
+
[01/20/2026-06:57:43] [I] === Performance summary ===
|
| 398 |
+
[01/20/2026-06:57:43] [I] Throughput: 732.043 qps
|
| 399 |
+
[01/20/2026-06:57:43] [I] Latency: min = 1.57953 ms, max = 1.59692 ms, mean = 1.58984 ms, median = 1.59009 ms, percentile(90%) = 1.59253 ms, percentile(95%) = 1.59326 ms, percentile(99%) = 1.59448 ms
|
| 400 |
+
[01/20/2026-06:57:43] [I] Enqueue Time: min = 0.426849 ms, max = 1.68213 ms, mean = 0.449191 ms, median = 0.439941 ms, percentile(90%) = 0.460449 ms, percentile(95%) = 0.486328 ms, percentile(99%) = 0.567993 ms
|
| 401 |
+
[01/20/2026-06:57:43] [I] H2D Latency: min = 0.213867 ms, max = 0.227295 ms, mean = 0.22142 ms, median = 0.221924 ms, percentile(90%) = 0.222534 ms, percentile(95%) = 0.222717 ms, percentile(99%) = 0.223145 ms
|
| 402 |
+
[01/20/2026-06:57:43] [I] GPU Compute Time: min = 1.3568 ms, max = 1.36914 ms, mean = 1.36374 ms, median = 1.36389 ms, percentile(90%) = 1.36597 ms, percentile(95%) = 1.36621 ms, percentile(99%) = 1.36792 ms
|
| 403 |
+
[01/20/2026-06:57:43] [I] D2H Latency: min = 0.00415039 ms, max = 0.00634766 ms, mean = 0.00469234 ms, median = 0.0045166 ms, percentile(90%) = 0.00561523 ms, percentile(95%) = 0.00585938 ms, percentile(99%) = 0.00610352 ms
|
| 404 |
+
[01/20/2026-06:57:43] [I] Total Host Walltime: 3.00392 s
|
| 405 |
+
[01/20/2026-06:57:43] [I] Total GPU Compute Time: 2.99885 s
|
| 406 |
+
[01/20/2026-06:57:43] [I] Explanations of the performance metrics are printed in the verbose logs.
|
| 407 |
+
[01/20/2026-06:57:43] [I]
|
| 408 |
+
&&&& PASSED TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_dinov3_s_coco.onnx --saveEngine=checkpoints/deimv2_dinov3_s_coco.engine --fp16 --optShapes=images:1x3x640x640,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
deimv2_dinov3_s_coco.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32200aab13bc4edbb08b5e55156479f97696facb16f29560ab00cbded8a69faf
|
| 3 |
+
size 40102933
|
deimv2_dinov3_s_coco.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9491ab33b68ecfc0e34043abb3009599ab1e892fb953a1faad12ef4fca5a35c4
|
| 3 |
+
size 39537357
|
deimv2_dinov3_x_coco.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f4acd1a16a31c62b56a551c06eafdff933a67ae77b50c8c4f04c5f2148d0c52
|
| 3 |
+
size 105439276
|
deimv2_dinov3_x_coco.log
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
&&&& RUNNING TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_dinov3_x_coco.onnx --saveEngine=checkpoints/deimv2_dinov3_x_coco.engine --fp16 --optShapes=images:1x3x640x640,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
| 2 |
+
[01/20/2026-07:02:51] [W] optShapes is being broadcasted to minShapes for tensor orig_target_sizes
|
| 3 |
+
[01/20/2026-07:02:51] [W] optShapes is being broadcasted to maxShapes for tensor orig_target_sizes
|
| 4 |
+
[01/20/2026-07:02:51] [W] optShapes is being broadcasted to minShapes for tensor images
|
| 5 |
+
[01/20/2026-07:02:51] [W] optShapes is being broadcasted to maxShapes for tensor images
|
| 6 |
+
[01/20/2026-07:02:51] [W] Weakly-typed networks have been deprecated in TensorRT. You can use the AutoCast tool (https://nvidia.github.io/TensorRT-Model-Optimizer/guides/8_autocast.html) to convert the network to be strongly typed.
|
| 7 |
+
[01/20/2026-07:02:51] [I] === Model Options ===
|
| 8 |
+
[01/20/2026-07:02:51] [I] Format: ONNX
|
| 9 |
+
[01/20/2026-07:02:51] [I] Model: checkpoints/deimv2_dinov3_x_coco.onnx
|
| 10 |
+
[01/20/2026-07:02:51] [I] Output:
|
| 11 |
+
[01/20/2026-07:02:51] [I] === Build Options ===
|
| 12 |
+
[01/20/2026-07:02:51] [I] Memory Pools: workspace: 4096 MiB, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default, tacticSharedMem: default
|
| 13 |
+
[01/20/2026-07:02:51] [I] avgTiming: 8
|
| 14 |
+
[01/20/2026-07:02:51] [I] Precision: FP32+FP16
|
| 15 |
+
[01/20/2026-07:02:51] [I] LayerPrecisions:
|
| 16 |
+
[01/20/2026-07:02:51] [I] Layer Device Types:
|
| 17 |
+
[01/20/2026-07:02:51] [I] Decomposable Attentions:
|
| 18 |
+
[01/20/2026-07:02:51] [I] Calibration:
|
| 19 |
+
[01/20/2026-07:02:51] [I] Refit: Disabled
|
| 20 |
+
[01/20/2026-07:02:51] [I] Strip weights: Disabled
|
| 21 |
+
[01/20/2026-07:02:51] [I] Version Compatible: Disabled
|
| 22 |
+
[01/20/2026-07:02:51] [I] ONNX Plugin InstanceNorm: Disabled
|
| 23 |
+
[01/20/2026-07:02:51] [I] ONNX kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA flag: Disabled
|
| 24 |
+
[01/20/2026-07:02:51] [I] TensorRT runtime: full
|
| 25 |
+
[01/20/2026-07:02:51] [I] Lean DLL Path:
|
| 26 |
+
[01/20/2026-07:02:51] [I] Tempfile Controls: { in_memory: allow, temporary: allow }
|
| 27 |
+
[01/20/2026-07:02:51] [I] Exclude Lean Runtime: Disabled
|
| 28 |
+
[01/20/2026-07:02:51] [I] Sparsity: Disabled
|
| 29 |
+
[01/20/2026-07:02:51] [I] Safe mode: Disabled
|
| 30 |
+
[01/20/2026-07:02:51] [I] Build DLA standalone loadable: Disabled
|
| 31 |
+
[01/20/2026-07:02:51] [I] Allow GPU fallback for DLA: Disabled
|
| 32 |
+
[01/20/2026-07:02:51] [I] DirectIO mode: Disabled
|
| 33 |
+
[01/20/2026-07:02:51] [I] Restricted mode: Disabled
|
| 34 |
+
[01/20/2026-07:02:51] [I] Skip inference: Disabled
|
| 35 |
+
[01/20/2026-07:02:51] [I] Save engine: checkpoints/deimv2_dinov3_x_coco.engine
|
| 36 |
+
[01/20/2026-07:02:51] [I] Load engine:
|
| 37 |
+
[01/20/2026-07:02:51] [I] Profiling verbosity: 0
|
| 38 |
+
[01/20/2026-07:02:51] [I] Tactic sources: Using default tactic sources
|
| 39 |
+
[01/20/2026-07:02:51] [I] timingCacheMode: local
|
| 40 |
+
[01/20/2026-07:02:51] [I] timingCacheFile:
|
| 41 |
+
[01/20/2026-07:02:51] [I] Enable Compilation Cache: Enabled
|
| 42 |
+
[01/20/2026-07:02:51] [I] Enable Monitor Memory: Disabled
|
| 43 |
+
[01/20/2026-07:02:51] [I] errorOnTimingCacheMiss: Disabled
|
| 44 |
+
[01/20/2026-07:02:51] [I] Preview Features: Use default preview flags.
|
| 45 |
+
[01/20/2026-07:02:51] [I] MaxAuxStreams: -1
|
| 46 |
+
[01/20/2026-07:02:51] [I] BuilderOptimizationLevel: 3
|
| 47 |
+
[01/20/2026-07:02:51] [I] MaxTactics: -1
|
| 48 |
+
[01/20/2026-07:02:51] [I] Calibration Profile Index: 0
|
| 49 |
+
[01/20/2026-07:02:51] [I] Weight Streaming: Disabled
|
| 50 |
+
[01/20/2026-07:02:51] [I] Runtime Platform: Same As Build
|
| 51 |
+
[01/20/2026-07:02:51] [I] Debug Tensors:
|
| 52 |
+
[01/20/2026-07:02:51] [I] Distributive Independence: Disabled
|
| 53 |
+
[01/20/2026-07:02:51] [I] Mark Unfused Tensors As Debug Tensors: Disabled
|
| 54 |
+
[01/20/2026-07:02:51] [I] Input(s)s format: fp32:CHW
|
| 55 |
+
[01/20/2026-07:02:51] [I] Output(s)s format: fp32:CHW
|
| 56 |
+
[01/20/2026-07:02:51] [I] Input build shape (profile 0): images=1x3x640x640+1x3x640x640+1x3x640x640
|
| 57 |
+
[01/20/2026-07:02:51] [I] Input build shape (profile 0): orig_target_sizes=1x2+1x2+1x2
|
| 58 |
+
[01/20/2026-07:02:51] [I] Input calibration shapes: model
|
| 59 |
+
[01/20/2026-07:02:51] [I] === System Options ===
|
| 60 |
+
[01/20/2026-07:02:51] [I] Device: 0
|
| 61 |
+
[01/20/2026-07:02:51] [I] DLACore:
|
| 62 |
+
[01/20/2026-07:02:51] [I] Plugins:
|
| 63 |
+
[01/20/2026-07:02:51] [I] setPluginsToSerialize:
|
| 64 |
+
[01/20/2026-07:02:51] [I] dynamicPlugins:
|
| 65 |
+
[01/20/2026-07:02:51] [I] ignoreParsedPluginLibs: 0
|
| 66 |
+
[01/20/2026-07:02:51] [I]
|
| 67 |
+
[01/20/2026-07:02:51] [I] === Inference Options ===
|
| 68 |
+
[01/20/2026-07:02:51] [I] Batch: Explicit
|
| 69 |
+
[01/20/2026-07:02:51] [I] Input inference shape : orig_target_sizes=1x2
|
| 70 |
+
[01/20/2026-07:02:51] [I] Input inference shape : images=1x3x640x640
|
| 71 |
+
[01/20/2026-07:02:51] [I] Iterations: 10
|
| 72 |
+
[01/20/2026-07:02:51] [I] Duration: 3s (+ 200ms warm up)
|
| 73 |
+
[01/20/2026-07:02:51] [I] Sleep time: 0ms
|
| 74 |
+
[01/20/2026-07:02:51] [I] Idle time: 0ms
|
| 75 |
+
[01/20/2026-07:02:51] [I] Inference Streams: 1
|
| 76 |
+
[01/20/2026-07:02:51] [I] ExposeDMA: Disabled
|
| 77 |
+
[01/20/2026-07:02:51] [I] Data transfers: Enabled
|
| 78 |
+
[01/20/2026-07:02:51] [I] Spin-wait: Disabled
|
| 79 |
+
[01/20/2026-07:02:51] [I] Multithreading: Disabled
|
| 80 |
+
[01/20/2026-07:02:51] [I] CUDA Graph: Disabled
|
| 81 |
+
[01/20/2026-07:02:51] [I] Separate profiling: Disabled
|
| 82 |
+
[01/20/2026-07:02:51] [I] Time Deserialize: Disabled
|
| 83 |
+
[01/20/2026-07:02:51] [I] Time Refit: Disabled
|
| 84 |
+
[01/20/2026-07:02:51] [I] NVTX verbosity: 0
|
| 85 |
+
[01/20/2026-07:02:51] [I] Persistent Cache Ratio: 0
|
| 86 |
+
[01/20/2026-07:02:51] [I] Optimization Profile Index: 0
|
| 87 |
+
[01/20/2026-07:02:51] [I] Weight Streaming Budget: 100.000000%
|
| 88 |
+
[01/20/2026-07:02:51] [I] Inputs:
|
| 89 |
+
[01/20/2026-07:02:51] [I] Debug Tensor Save Destinations:
|
| 90 |
+
[01/20/2026-07:02:51] [I] Dump All Debug Tensor in Formats:
|
| 91 |
+
[01/20/2026-07:02:51] [I] === Reporting Options ===
|
| 92 |
+
[01/20/2026-07:02:51] [I] Verbose: Disabled
|
| 93 |
+
[01/20/2026-07:02:51] [I] Averages: 10 inferences
|
| 94 |
+
[01/20/2026-07:02:51] [I] Percentiles: 90,95,99
|
| 95 |
+
[01/20/2026-07:02:51] [I] Dump refittable layers:Disabled
|
| 96 |
+
[01/20/2026-07:02:51] [I] Dump output: Disabled
|
| 97 |
+
[01/20/2026-07:02:51] [I] Profile: Disabled
|
| 98 |
+
[01/20/2026-07:02:51] [I] Export timing to JSON file:
|
| 99 |
+
[01/20/2026-07:02:51] [I] Export output to JSON file:
|
| 100 |
+
[01/20/2026-07:02:51] [I] Export profile to JSON file:
|
| 101 |
+
[01/20/2026-07:02:51] [I]
|
| 102 |
+
[01/20/2026-07:02:51] [I] === Device Information ===
|
| 103 |
+
[01/20/2026-07:02:51] [I] Available Devices:
|
| 104 |
+
[01/20/2026-07:02:51] [I] Device 0: "NVIDIA GeForce RTX 4090" UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 105 |
+
[01/20/2026-07:02:51] [I] Selected Device: NVIDIA GeForce RTX 4090
|
| 106 |
+
[01/20/2026-07:02:51] [I] Selected Device ID: 0
|
| 107 |
+
[01/20/2026-07:02:51] [I] Selected Device UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 108 |
+
[01/20/2026-07:02:51] [I] Compute Capability: 8.9
|
| 109 |
+
[01/20/2026-07:02:51] [I] SMs: 128
|
| 110 |
+
[01/20/2026-07:02:51] [I] Device Global Memory: 24071 MiB
|
| 111 |
+
[01/20/2026-07:02:51] [I] Shared Memory per SM: 100 KiB
|
| 112 |
+
[01/20/2026-07:02:51] [I] Memory Bus Width: 384 bits (ECC disabled)
|
| 113 |
+
[01/20/2026-07:02:51] [I] Application Compute Clock Rate: 2.52 GHz
|
| 114 |
+
[01/20/2026-07:02:51] [I] Application Memory Clock Rate: 10.501 GHz
|
| 115 |
+
[01/20/2026-07:02:51] [I]
|
| 116 |
+
[01/20/2026-07:02:51] [I] Note: The application clock rates do not reflect the actual clock rates that the GPU is currently running at.
|
| 117 |
+
[01/20/2026-07:02:51] [I]
|
| 118 |
+
[01/20/2026-07:02:51] [I] TensorRT version: 10.14.1
|
| 119 |
+
[01/20/2026-07:02:51] [I] Loading standard plugins
|
| 120 |
+
[01/20/2026-07:02:51] [I] [TRT] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 29, GPU 10549 (MiB)
|
| 121 |
+
[01/20/2026-07:02:51] [I] Start parsing network model.
|
| 122 |
+
[01/20/2026-07:02:52] [I] [TRT] ----------------------------------------------------------------
|
| 123 |
+
[01/20/2026-07:02:52] [I] [TRT] Input filename: checkpoints/deimv2_dinov3_x_coco.onnx
|
| 124 |
+
[01/20/2026-07:02:52] [I] [TRT] ONNX IR version: 0.0.8
|
| 125 |
+
[01/20/2026-07:02:52] [I] [TRT] Opset version: 17
|
| 126 |
+
[01/20/2026-07:02:52] [I] [TRT] Producer name: pytorch
|
| 127 |
+
[01/20/2026-07:02:52] [I] [TRT] Producer version: 2.10.0
|
| 128 |
+
[01/20/2026-07:02:52] [I] [TRT] Domain:
|
| 129 |
+
[01/20/2026-07:02:52] [I] [TRT] Model version: 0
|
| 130 |
+
[01/20/2026-07:02:52] [I] [TRT] Doc string:
|
| 131 |
+
[01/20/2026-07:02:52] [I] [TRT] ----------------------------------------------------------------
|
| 132 |
+
[01/20/2026-07:02:52] [W] [TRT] ModelImporter.cpp:661: Make sure input orig_target_sizes has Int64 binding.
|
| 133 |
+
[01/20/2026-07:02:52] [W] [TRT] ModelImporter.cpp:908: Make sure output labels has Int64 binding.
|
| 134 |
+
[01/20/2026-07:02:52] [I] Finished parsing network model. Parse time: 0.237921
|
| 135 |
+
[01/20/2026-07:02:52] [I] Set shape of input tensor images for optimization profile 0 to: MIN=1x3x640x640 OPT=1x3x640x640 MAX=1x3x640x640
|
| 136 |
+
[01/20/2026-07:02:52] [I] Set shape of input tensor orig_target_sizes for optimization profile 0 to: MIN=1x2 OPT=1x2 MAX=1x2
|
| 137 |
+
[01/20/2026-07:02:52] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +203, GPU +4, now: CPU 735, GPU 10553 (MiB)
|
| 138 |
+
[01/20/2026-07:02:52] [W] [TRT] Detected layernorm nodes in FP16.
|
| 139 |
+
[01/20/2026-07:02:52] [W] [TRT] Running layernorm after self-attention with FP16 Reduce or Pow may cause overflow. Forcing Reduce or Pow Layers in FP32 precision, or exporting the model to use INormalizationLayer (available with ONNX opset >= 17) can help preserving accuracy.
|
| 140 |
+
[01/20/2026-07:02:52] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored.
|
| 141 |
+
[01/20/2026-07:03:33] [I] [TRT] Compiler backend is used during engine build.
|
| 142 |
+
[01/20/2026-07:05:25] [I] [TRT] Detected 2 inputs and 3 output network tensors.
|
| 143 |
+
[01/20/2026-07:05:26] [I] [TRT] Total Host Persistent Memory: 376608 bytes
|
| 144 |
+
[01/20/2026-07:05:26] [I] [TRT] Total Device Persistent Memory: 1024 bytes
|
| 145 |
+
[01/20/2026-07:05:26] [I] [TRT] Max Scratch Memory: 20131840 bytes
|
| 146 |
+
[01/20/2026-07:05:26] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 107 steps to complete.
|
| 147 |
+
[01/20/2026-07:05:26] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 4.29572ms to assign 11 blocks to 107 nodes requiring 45025792 bytes.
|
| 148 |
+
[01/20/2026-07:05:26] [I] [TRT] Total Activation Memory: 45025792 bytes
|
| 149 |
+
[01/20/2026-07:05:26] [I] [TRT] Total Weights Memory: 100957696 bytes
|
| 150 |
+
[01/20/2026-07:05:26] [I] [TRT] Compiler backend is used during engine execution.
|
| 151 |
+
[01/20/2026-07:05:26] [I] [TRT] Engine generation completed in 154.472 seconds.
|
| 152 |
+
[01/20/2026-07:05:26] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 54 MiB, GPU 255 MiB
|
| 153 |
+
[01/20/2026-07:05:27] [I] Created engine with size: 100.555 MiB
|
| 154 |
+
[01/20/2026-07:05:27] [I] Engine built in 154.898 sec.
|
| 155 |
+
[01/20/2026-07:05:27] [I] [TRT] Loaded engine size: 100 MiB
|
| 156 |
+
[01/20/2026-07:05:27] [I] Engine deserialized in 0.0311196 sec.
|
| 157 |
+
[01/20/2026-07:05:27] [I] [TRT] [MS] Running engine with multi stream info
|
| 158 |
+
[01/20/2026-07:05:27] [I] [TRT] [MS] Number of aux streams is 3
|
| 159 |
+
[01/20/2026-07:05:27] [I] [TRT] [MS] Number of total worker streams is 4
|
| 160 |
+
[01/20/2026-07:05:27] [I] [TRT] [MS] The main stream provided by execute/enqueue calls is the first worker stream
|
| 161 |
+
[01/20/2026-07:05:27] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +43, now: CPU 0, GPU 139 (MiB)
|
| 162 |
+
[01/20/2026-07:05:27] [I] Setting persistentCacheLimit to 0 bytes.
|
| 163 |
+
[01/20/2026-07:05:27] [I] Created execution context with device memory size: 42.9399 MiB
|
| 164 |
+
[01/20/2026-07:05:27] [I] Using random values for input images
|
| 165 |
+
[01/20/2026-07:05:27] [I] Input binding for images with dimensions 1x3x640x640 is created.
|
| 166 |
+
[01/20/2026-07:05:27] [I] Using random values for input orig_target_sizes
|
| 167 |
+
[01/20/2026-07:05:27] [I] Input binding for orig_target_sizes with dimensions 1x2 is created.
|
| 168 |
+
[01/20/2026-07:05:27] [I] Output binding for labels with dimensions 1x300 is created.
|
| 169 |
+
[01/20/2026-07:05:27] [I] Output binding for boxes with dimensions 1x300x4 is created.
|
| 170 |
+
[01/20/2026-07:05:27] [I] Output binding for scores with dimensions 1x300 is created.
|
| 171 |
+
[01/20/2026-07:05:27] [I] Starting inference
|
| 172 |
+
[01/20/2026-07:05:30] [I] Warmup completed 66 queries over 200 ms
|
| 173 |
+
[01/20/2026-07:05:30] [I] Timing trace has 1059 queries over 3.00739 s
|
| 174 |
+
[01/20/2026-07:05:30] [I]
|
| 175 |
+
[01/20/2026-07:05:30] [I] === Trace details ===
|
| 176 |
+
[01/20/2026-07:05:30] [I] Trace averages of 10 runs:
|
| 177 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83336 ms - Host latency: 3.07233 ms (enqueue 0.701578 ms)
|
| 178 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83168 ms - Host latency: 3.07287 ms (enqueue 0.6114 ms)
|
| 179 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83374 ms - Host latency: 3.07555 ms (enqueue 0.580649 ms)
|
| 180 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83187 ms - Host latency: 3.07226 ms (enqueue 0.58468 ms)
|
| 181 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83537 ms - Host latency: 3.07763 ms (enqueue 0.583014 ms)
|
| 182 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83669 ms - Host latency: 3.07737 ms (enqueue 0.57963 ms)
|
| 183 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83434 ms - Host latency: 3.07546 ms (enqueue 0.565479 ms)
|
| 184 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83679 ms - Host latency: 3.07714 ms (enqueue 0.915753 ms)
|
| 185 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83261 ms - Host latency: 3.07397 ms (enqueue 0.606302 ms)
|
| 186 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83498 ms - Host latency: 3.076 ms (enqueue 0.579776 ms)
|
| 187 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83251 ms - Host latency: 3.07282 ms (enqueue 0.568085 ms)
|
| 188 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83575 ms - Host latency: 3.07682 ms (enqueue 0.559 ms)
|
| 189 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83212 ms - Host latency: 3.07352 ms (enqueue 0.566913 ms)
|
| 190 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83415 ms - Host latency: 3.07585 ms (enqueue 0.554803 ms)
|
| 191 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83711 ms - Host latency: 3.07679 ms (enqueue 0.602917 ms)
|
| 192 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83541 ms - Host latency: 3.07644 ms (enqueue 0.566254 ms)
|
| 193 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83577 ms - Host latency: 3.07631 ms (enqueue 0.573004 ms)
|
| 194 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83618 ms - Host latency: 3.0764 ms (enqueue 0.565643 ms)
|
| 195 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83678 ms - Host latency: 3.07716 ms (enqueue 0.562018 ms)
|
| 196 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83444 ms - Host latency: 3.0758 ms (enqueue 0.561792 ms)
|
| 197 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83307 ms - Host latency: 3.07421 ms (enqueue 0.552844 ms)
|
| 198 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.8335 ms - Host latency: 3.07266 ms (enqueue 0.841644 ms)
|
| 199 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83771 ms - Host latency: 3.07906 ms (enqueue 0.563574 ms)
|
| 200 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83376 ms - Host latency: 3.0748 ms (enqueue 0.557874 ms)
|
| 201 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83472 ms - Host latency: 3.07563 ms (enqueue 0.555933 ms)
|
| 202 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83764 ms - Host latency: 3.0792 ms (enqueue 0.560291 ms)
|
| 203 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83876 ms - Host latency: 3.08054 ms (enqueue 0.561816 ms)
|
| 204 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83506 ms - Host latency: 3.07485 ms (enqueue 0.546808 ms)
|
| 205 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83537 ms - Host latency: 3.07655 ms (enqueue 0.562311 ms)
|
| 206 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83401 ms - Host latency: 3.0748 ms (enqueue 0.553601 ms)
|
| 207 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83458 ms - Host latency: 3.07444 ms (enqueue 0.554346 ms)
|
| 208 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83433 ms - Host latency: 3.07482 ms (enqueue 0.549353 ms)
|
| 209 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.8351 ms - Host latency: 3.07762 ms (enqueue 0.560791 ms)
|
| 210 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83547 ms - Host latency: 3.07562 ms (enqueue 0.546594 ms)
|
| 211 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.8327 ms - Host latency: 3.07433 ms (enqueue 0.547229 ms)
|
| 212 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83512 ms - Host latency: 3.07487 ms (enqueue 0.569556 ms)
|
| 213 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.835 ms - Host latency: 3.07515 ms (enqueue 0.549707 ms)
|
| 214 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83547 ms - Host latency: 3.07701 ms (enqueue 0.586328 ms)
|
| 215 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.837 ms - Host latency: 3.07726 ms (enqueue 0.626392 ms)
|
| 216 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83579 ms - Host latency: 3.07651 ms (enqueue 0.560889 ms)
|
| 217 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83383 ms - Host latency: 3.07456 ms (enqueue 0.550244 ms)
|
| 218 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83232 ms - Host latency: 3.07349 ms (enqueue 0.55011 ms)
|
| 219 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83391 ms - Host latency: 3.0741 ms (enqueue 0.55332 ms)
|
| 220 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83215 ms - Host latency: 3.07223 ms (enqueue 0.570496 ms)
|
| 221 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83461 ms - Host latency: 3.07531 ms (enqueue 0.611377 ms)
|
| 222 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83903 ms - Host latency: 3.08014 ms (enqueue 0.648914 ms)
|
| 223 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83828 ms - Host latency: 3.07887 ms (enqueue 0.61167 ms)
|
| 224 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83721 ms - Host latency: 3.07781 ms (enqueue 0.598376 ms)
|
| 225 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83433 ms - Host latency: 3.07522 ms (enqueue 0.574182 ms)
|
| 226 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83784 ms - Host latency: 3.07988 ms (enqueue 0.57605 ms)
|
| 227 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.8355 ms - Host latency: 3.07539 ms (enqueue 0.767432 ms)
|
| 228 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.8395 ms - Host latency: 3.08134 ms (enqueue 0.605664 ms)
|
| 229 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83661 ms - Host latency: 3.0766 ms (enqueue 0.590234 ms)
|
| 230 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.836 ms - Host latency: 3.07712 ms (enqueue 0.579431 ms)
|
| 231 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.8364 ms - Host latency: 3.07605 ms (enqueue 0.597498 ms)
|
| 232 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83751 ms - Host latency: 3.07806 ms (enqueue 0.824365 ms)
|
| 233 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.84104 ms - Host latency: 3.08406 ms (enqueue 0.769312 ms)
|
| 234 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83573 ms - Host latency: 3.07745 ms (enqueue 0.704712 ms)
|
| 235 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83588 ms - Host latency: 3.07727 ms (enqueue 0.593005 ms)
|
| 236 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83518 ms - Host latency: 3.07616 ms (enqueue 0.577429 ms)
|
| 237 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83756 ms - Host latency: 3.07815 ms (enqueue 0.736743 ms)
|
| 238 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.8374 ms - Host latency: 3.07781 ms (enqueue 0.592505 ms)
|
| 239 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83972 ms - Host latency: 3.08015 ms (enqueue 0.628516 ms)
|
| 240 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.84054 ms - Host latency: 3.08188 ms (enqueue 0.596912 ms)
|
| 241 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83979 ms - Host latency: 3.08151 ms (enqueue 0.584668 ms)
|
| 242 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83789 ms - Host latency: 3.07964 ms (enqueue 0.565247 ms)
|
| 243 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83699 ms - Host latency: 3.07812 ms (enqueue 0.556909 ms)
|
| 244 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.84001 ms - Host latency: 3.08103 ms (enqueue 0.646191 ms)
|
| 245 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83679 ms - Host latency: 3.07761 ms (enqueue 0.59978 ms)
|
| 246 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.8342 ms - Host latency: 3.07512 ms (enqueue 0.548999 ms)
|
| 247 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83394 ms - Host latency: 3.07529 ms (enqueue 0.549609 ms)
|
| 248 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83601 ms - Host latency: 3.07656 ms (enqueue 0.560278 ms)
|
| 249 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83389 ms - Host latency: 3.07397 ms (enqueue 0.571289 ms)
|
| 250 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83408 ms - Host latency: 3.07483 ms (enqueue 0.553686 ms)
|
| 251 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83159 ms - Host latency: 3.07146 ms (enqueue 0.548755 ms)
|
| 252 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83374 ms - Host latency: 3.07417 ms (enqueue 0.547095 ms)
|
| 253 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83606 ms - Host latency: 3.07732 ms (enqueue 0.540796 ms)
|
| 254 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83538 ms - Host latency: 3.07573 ms (enqueue 0.544482 ms)
|
| 255 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83579 ms - Host latency: 3.07656 ms (enqueue 0.556934 ms)
|
| 256 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83499 ms - Host latency: 3.07476 ms (enqueue 0.566772 ms)
|
| 257 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83511 ms - Host latency: 3.0771 ms (enqueue 0.565356 ms)
|
| 258 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83394 ms - Host latency: 3.07466 ms (enqueue 0.549658 ms)
|
| 259 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83574 ms - Host latency: 3.07695 ms (enqueue 0.558032 ms)
|
| 260 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83577 ms - Host latency: 3.0759 ms (enqueue 0.541553 ms)
|
| 261 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83303 ms - Host latency: 3.07288 ms (enqueue 0.549805 ms)
|
| 262 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.8356 ms - Host latency: 3.07651 ms (enqueue 0.564941 ms)
|
| 263 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.8386 ms - Host latency: 3.07976 ms (enqueue 0.546021 ms)
|
| 264 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83457 ms - Host latency: 3.07576 ms (enqueue 0.545605 ms)
|
| 265 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83643 ms - Host latency: 3.07664 ms (enqueue 0.535596 ms)
|
| 266 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83413 ms - Host latency: 3.07429 ms (enqueue 0.540967 ms)
|
| 267 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83479 ms - Host latency: 3.07485 ms (enqueue 0.557007 ms)
|
| 268 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83423 ms - Host latency: 3.07478 ms (enqueue 0.538745 ms)
|
| 269 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83469 ms - Host latency: 3.07603 ms (enqueue 0.535815 ms)
|
| 270 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.8321 ms - Host latency: 3.07302 ms (enqueue 0.54021 ms)
|
| 271 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83604 ms - Host latency: 3.07712 ms (enqueue 0.540405 ms)
|
| 272 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83286 ms - Host latency: 3.07415 ms (enqueue 0.53977 ms)
|
| 273 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83679 ms - Host latency: 3.07815 ms (enqueue 0.541943 ms)
|
| 274 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83728 ms - Host latency: 3.07744 ms (enqueue 0.585742 ms)
|
| 275 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83982 ms - Host latency: 3.08137 ms (enqueue 0.593237 ms)
|
| 276 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83772 ms - Host latency: 3.07883 ms (enqueue 0.568213 ms)
|
| 277 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83499 ms - Host latency: 3.07654 ms (enqueue 0.556763 ms)
|
| 278 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83828 ms - Host latency: 3.07947 ms (enqueue 0.555713 ms)
|
| 279 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83884 ms - Host latency: 3.079 ms (enqueue 0.78623 ms)
|
| 280 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83235 ms - Host latency: 3.07083 ms (enqueue 1.00188 ms)
|
| 281 |
+
[01/20/2026-07:05:30] [I] Average on 10 runs - GPU latency: 2.83823 ms - Host latency: 3.08057 ms (enqueue 0.596899 ms)
|
| 282 |
+
[01/20/2026-07:05:30] [I]
|
| 283 |
+
[01/20/2026-07:05:30] [I] === Performance summary ===
|
| 284 |
+
[01/20/2026-07:05:30] [I] Throughput: 352.132 qps
|
| 285 |
+
[01/20/2026-07:05:30] [I] Latency: min = 2.91431 ms, max = 3.09717 ms, mean = 3.07624 ms, median = 3.07642 ms, percentile(90%) = 3.08423 ms, percentile(95%) = 3.08667 ms, percentile(99%) = 3.09033 ms
|
| 286 |
+
[01/20/2026-07:05:30] [I] Enqueue Time: min = 0.532471 ms, max = 1.69238 ms, mean = 0.590915 ms, median = 0.56012 ms, percentile(90%) = 0.625488 ms, percentile(95%) = 0.673096 ms, percentile(99%) = 1.30176 ms
|
| 287 |
+
[01/20/2026-07:05:30] [I] H2D Latency: min = 0.226257 ms, max = 0.238525 ms, mean = 0.233766 ms, median = 0.233887 ms, percentile(90%) = 0.235413 ms, percentile(95%) = 0.235901 ms, percentile(99%) = 0.237183 ms
|
| 288 |
+
[01/20/2026-07:05:30] [I] GPU Compute Time: min = 2.67676 ms, max = 2.85498 ms, mean = 2.83542 ms, median = 2.83545 ms, percentile(90%) = 2.84277 ms, percentile(95%) = 2.84473 ms, percentile(99%) = 2.8479 ms
|
| 289 |
+
[01/20/2026-07:05:30] [I] D2H Latency: min = 0.00415039 ms, max = 0.00939941 ms, mean = 0.00705897 ms, median = 0.00744629 ms, percentile(90%) = 0.00878906 ms, percentile(95%) = 0.0090332 ms, percentile(99%) = 0.00927734 ms
|
| 290 |
+
[01/20/2026-07:05:30] [I] Total Host Walltime: 3.00739 s
|
| 291 |
+
[01/20/2026-07:05:30] [I] Total GPU Compute Time: 3.00271 s
|
| 292 |
+
[01/20/2026-07:05:30] [I] Explanations of the performance metrics are printed in the verbose logs.
|
| 293 |
+
[01/20/2026-07:05:30] [I]
|
| 294 |
+
&&&& PASSED TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_dinov3_x_coco.onnx --saveEngine=checkpoints/deimv2_dinov3_x_coco.engine --fp16 --optShapes=images:1x3x640x640,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
deimv2_dinov3_x_coco.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f470905b1d2e993023e85fde6164a9cba18da3a8480e05de99299a9755b8824
|
| 3 |
+
size 202566568
|
deimv2_dinov3_x_coco.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b79dc9a619d2cbf94b712f705e0a4ace8c44d6304be54ddfcd6f4a85c45b039
|
| 3 |
+
size 205581424
|
deimv2_hgnetv2_atto_coco.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a78a3067184649b8e7a01e8e36f8ac4c66fc8131ecd0ba09e7b9e9fe13438c0
|
| 3 |
+
size 3709300
|
deimv2_hgnetv2_atto_coco.log
ADDED
|
@@ -0,0 +1,586 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
&&&& RUNNING TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_hgnetv2_atto_coco.onnx --saveEngine=checkpoints/deimv2_hgnetv2_atto_coco.engine --fp16 --optShapes=images:1x3x320x320,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
| 2 |
+
[01/20/2026-06:47:47] [W] optShapes is being broadcasted to minShapes for tensor orig_target_sizes
|
| 3 |
+
[01/20/2026-06:47:47] [W] optShapes is being broadcasted to maxShapes for tensor orig_target_sizes
|
| 4 |
+
[01/20/2026-06:47:47] [W] optShapes is being broadcasted to minShapes for tensor images
|
| 5 |
+
[01/20/2026-06:47:47] [W] optShapes is being broadcasted to maxShapes for tensor images
|
| 6 |
+
[01/20/2026-06:47:47] [W] Weakly-typed networks have been deprecated in TensorRT. You can use the AutoCast tool (https://nvidia.github.io/TensorRT-Model-Optimizer/guides/8_autocast.html) to convert the network to be strongly typed.
|
| 7 |
+
[01/20/2026-06:47:47] [I] === Model Options ===
|
| 8 |
+
[01/20/2026-06:47:47] [I] Format: ONNX
|
| 9 |
+
[01/20/2026-06:47:47] [I] Model: checkpoints/deimv2_hgnetv2_atto_coco.onnx
|
| 10 |
+
[01/20/2026-06:47:47] [I] Output:
|
| 11 |
+
[01/20/2026-06:47:47] [I] === Build Options ===
|
| 12 |
+
[01/20/2026-06:47:47] [I] Memory Pools: workspace: 4096 MiB, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default, tacticSharedMem: default
|
| 13 |
+
[01/20/2026-06:47:47] [I] avgTiming: 8
|
| 14 |
+
[01/20/2026-06:47:47] [I] Precision: FP32+FP16
|
| 15 |
+
[01/20/2026-06:47:47] [I] LayerPrecisions:
|
| 16 |
+
[01/20/2026-06:47:47] [I] Layer Device Types:
|
| 17 |
+
[01/20/2026-06:47:47] [I] Decomposable Attentions:
|
| 18 |
+
[01/20/2026-06:47:47] [I] Calibration:
|
| 19 |
+
[01/20/2026-06:47:47] [I] Refit: Disabled
|
| 20 |
+
[01/20/2026-06:47:47] [I] Strip weights: Disabled
|
| 21 |
+
[01/20/2026-06:47:47] [I] Version Compatible: Disabled
|
| 22 |
+
[01/20/2026-06:47:47] [I] ONNX Plugin InstanceNorm: Disabled
|
| 23 |
+
[01/20/2026-06:47:47] [I] ONNX kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA flag: Disabled
|
| 24 |
+
[01/20/2026-06:47:47] [I] TensorRT runtime: full
|
| 25 |
+
[01/20/2026-06:47:47] [I] Lean DLL Path:
|
| 26 |
+
[01/20/2026-06:47:47] [I] Tempfile Controls: { in_memory: allow, temporary: allow }
|
| 27 |
+
[01/20/2026-06:47:47] [I] Exclude Lean Runtime: Disabled
|
| 28 |
+
[01/20/2026-06:47:47] [I] Sparsity: Disabled
|
| 29 |
+
[01/20/2026-06:47:47] [I] Safe mode: Disabled
|
| 30 |
+
[01/20/2026-06:47:47] [I] Build DLA standalone loadable: Disabled
|
| 31 |
+
[01/20/2026-06:47:47] [I] Allow GPU fallback for DLA: Disabled
|
| 32 |
+
[01/20/2026-06:47:47] [I] DirectIO mode: Disabled
|
| 33 |
+
[01/20/2026-06:47:47] [I] Restricted mode: Disabled
|
| 34 |
+
[01/20/2026-06:47:47] [I] Skip inference: Disabled
|
| 35 |
+
[01/20/2026-06:47:47] [I] Save engine: checkpoints/deimv2_hgnetv2_atto_coco.engine
|
| 36 |
+
[01/20/2026-06:47:47] [I] Load engine:
|
| 37 |
+
[01/20/2026-06:47:47] [I] Profiling verbosity: 0
|
| 38 |
+
[01/20/2026-06:47:47] [I] Tactic sources: Using default tactic sources
|
| 39 |
+
[01/20/2026-06:47:47] [I] timingCacheMode: local
|
| 40 |
+
[01/20/2026-06:47:47] [I] timingCacheFile:
|
| 41 |
+
[01/20/2026-06:47:47] [I] Enable Compilation Cache: Enabled
|
| 42 |
+
[01/20/2026-06:47:47] [I] Enable Monitor Memory: Disabled
|
| 43 |
+
[01/20/2026-06:47:47] [I] errorOnTimingCacheMiss: Disabled
|
| 44 |
+
[01/20/2026-06:47:47] [I] Preview Features: Use default preview flags.
|
| 45 |
+
[01/20/2026-06:47:47] [I] MaxAuxStreams: -1
|
| 46 |
+
[01/20/2026-06:47:47] [I] BuilderOptimizationLevel: 3
|
| 47 |
+
[01/20/2026-06:47:47] [I] MaxTactics: -1
|
| 48 |
+
[01/20/2026-06:47:47] [I] Calibration Profile Index: 0
|
| 49 |
+
[01/20/2026-06:47:47] [I] Weight Streaming: Disabled
|
| 50 |
+
[01/20/2026-06:47:47] [I] Runtime Platform: Same As Build
|
| 51 |
+
[01/20/2026-06:47:47] [I] Debug Tensors:
|
| 52 |
+
[01/20/2026-06:47:47] [I] Distributive Independence: Disabled
|
| 53 |
+
[01/20/2026-06:47:47] [I] Mark Unfused Tensors As Debug Tensors: Disabled
|
| 54 |
+
[01/20/2026-06:47:47] [I] Input(s)s format: fp32:CHW
|
| 55 |
+
[01/20/2026-06:47:47] [I] Output(s)s format: fp32:CHW
|
| 56 |
+
[01/20/2026-06:47:47] [I] Input build shape (profile 0): images=1x3x320x320+1x3x320x320+1x3x320x320
|
| 57 |
+
[01/20/2026-06:47:47] [I] Input build shape (profile 0): orig_target_sizes=1x2+1x2+1x2
|
| 58 |
+
[01/20/2026-06:47:47] [I] Input calibration shapes: model
|
| 59 |
+
[01/20/2026-06:47:47] [I] === System Options ===
|
| 60 |
+
[01/20/2026-06:47:47] [I] Device: 0
|
| 61 |
+
[01/20/2026-06:47:47] [I] DLACore:
|
| 62 |
+
[01/20/2026-06:47:47] [I] Plugins:
|
| 63 |
+
[01/20/2026-06:47:47] [I] setPluginsToSerialize:
|
| 64 |
+
[01/20/2026-06:47:47] [I] dynamicPlugins:
|
| 65 |
+
[01/20/2026-06:47:47] [I] ignoreParsedPluginLibs: 0
|
| 66 |
+
[01/20/2026-06:47:47] [I]
|
| 67 |
+
[01/20/2026-06:47:47] [I] === Inference Options ===
|
| 68 |
+
[01/20/2026-06:47:47] [I] Batch: Explicit
|
| 69 |
+
[01/20/2026-06:47:47] [I] Input inference shape : orig_target_sizes=1x2
|
| 70 |
+
[01/20/2026-06:47:47] [I] Input inference shape : images=1x3x320x320
|
| 71 |
+
[01/20/2026-06:47:47] [I] Iterations: 10
|
| 72 |
+
[01/20/2026-06:47:47] [I] Duration: 3s (+ 200ms warm up)
|
| 73 |
+
[01/20/2026-06:47:47] [I] Sleep time: 0ms
|
| 74 |
+
[01/20/2026-06:47:47] [I] Idle time: 0ms
|
| 75 |
+
[01/20/2026-06:47:47] [I] Inference Streams: 1
|
| 76 |
+
[01/20/2026-06:47:47] [I] ExposeDMA: Disabled
|
| 77 |
+
[01/20/2026-06:47:47] [I] Data transfers: Enabled
|
| 78 |
+
[01/20/2026-06:47:47] [I] Spin-wait: Disabled
|
| 79 |
+
[01/20/2026-06:47:47] [I] Multithreading: Disabled
|
| 80 |
+
[01/20/2026-06:47:47] [I] CUDA Graph: Disabled
|
| 81 |
+
[01/20/2026-06:47:47] [I] Separate profiling: Disabled
|
| 82 |
+
[01/20/2026-06:47:47] [I] Time Deserialize: Disabled
|
| 83 |
+
[01/20/2026-06:47:47] [I] Time Refit: Disabled
|
| 84 |
+
[01/20/2026-06:47:47] [I] NVTX verbosity: 0
|
| 85 |
+
[01/20/2026-06:47:47] [I] Persistent Cache Ratio: 0
|
| 86 |
+
[01/20/2026-06:47:47] [I] Optimization Profile Index: 0
|
| 87 |
+
[01/20/2026-06:47:47] [I] Weight Streaming Budget: 100.000000%
|
| 88 |
+
[01/20/2026-06:47:47] [I] Inputs:
|
| 89 |
+
[01/20/2026-06:47:47] [I] Debug Tensor Save Destinations:
|
| 90 |
+
[01/20/2026-06:47:47] [I] Dump All Debug Tensor in Formats:
|
| 91 |
+
[01/20/2026-06:47:47] [I] === Reporting Options ===
|
| 92 |
+
[01/20/2026-06:47:47] [I] Verbose: Disabled
|
| 93 |
+
[01/20/2026-06:47:47] [I] Averages: 10 inferences
|
| 94 |
+
[01/20/2026-06:47:47] [I] Percentiles: 90,95,99
|
| 95 |
+
[01/20/2026-06:47:47] [I] Dump refittable layers:Disabled
|
| 96 |
+
[01/20/2026-06:47:47] [I] Dump output: Disabled
|
| 97 |
+
[01/20/2026-06:47:47] [I] Profile: Disabled
|
| 98 |
+
[01/20/2026-06:47:47] [I] Export timing to JSON file:
|
| 99 |
+
[01/20/2026-06:47:47] [I] Export output to JSON file:
|
| 100 |
+
[01/20/2026-06:47:47] [I] Export profile to JSON file:
|
| 101 |
+
[01/20/2026-06:47:47] [I]
|
| 102 |
+
[01/20/2026-06:47:47] [I] === Device Information ===
|
| 103 |
+
[01/20/2026-06:47:47] [I] Available Devices:
|
| 104 |
+
[01/20/2026-06:47:47] [I] Device 0: "NVIDIA GeForce RTX 4090" UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 105 |
+
[01/20/2026-06:47:47] [I] Selected Device: NVIDIA GeForce RTX 4090
|
| 106 |
+
[01/20/2026-06:47:47] [I] Selected Device ID: 0
|
| 107 |
+
[01/20/2026-06:47:47] [I] Selected Device UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 108 |
+
[01/20/2026-06:47:47] [I] Compute Capability: 8.9
|
| 109 |
+
[01/20/2026-06:47:47] [I] SMs: 128
|
| 110 |
+
[01/20/2026-06:47:47] [I] Device Global Memory: 24071 MiB
|
| 111 |
+
[01/20/2026-06:47:47] [I] Shared Memory per SM: 100 KiB
|
| 112 |
+
[01/20/2026-06:47:47] [I] Memory Bus Width: 384 bits (ECC disabled)
|
| 113 |
+
[01/20/2026-06:47:47] [I] Application Compute Clock Rate: 2.52 GHz
|
| 114 |
+
[01/20/2026-06:47:47] [I] Application Memory Clock Rate: 10.501 GHz
|
| 115 |
+
[01/20/2026-06:47:47] [I]
|
| 116 |
+
[01/20/2026-06:47:47] [I] Note: The application clock rates do not reflect the actual clock rates that the GPU is currently running at.
|
| 117 |
+
[01/20/2026-06:47:47] [I]
|
| 118 |
+
[01/20/2026-06:47:47] [I] TensorRT version: 10.14.1
|
| 119 |
+
[01/20/2026-06:47:47] [I] Loading standard plugins
|
| 120 |
+
[01/20/2026-06:47:47] [I] [TRT] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 29, GPU 10547 (MiB)
|
| 121 |
+
[01/20/2026-06:47:47] [I] Start parsing network model.
|
| 122 |
+
[01/20/2026-06:47:47] [I] [TRT] ----------------------------------------------------------------
|
| 123 |
+
[01/20/2026-06:47:47] [I] [TRT] Input filename: checkpoints/deimv2_hgnetv2_atto_coco.onnx
|
| 124 |
+
[01/20/2026-06:47:47] [I] [TRT] ONNX IR version: 0.0.8
|
| 125 |
+
[01/20/2026-06:47:47] [I] [TRT] Opset version: 17
|
| 126 |
+
[01/20/2026-06:47:47] [I] [TRT] Producer name: pytorch
|
| 127 |
+
[01/20/2026-06:47:47] [I] [TRT] Producer version: 2.10.0
|
| 128 |
+
[01/20/2026-06:47:47] [I] [TRT] Domain:
|
| 129 |
+
[01/20/2026-06:47:47] [I] [TRT] Model version: 0
|
| 130 |
+
[01/20/2026-06:47:47] [I] [TRT] Doc string:
|
| 131 |
+
[01/20/2026-06:47:47] [I] [TRT] ----------------------------------------------------------------
|
| 132 |
+
[01/20/2026-06:47:47] [W] [TRT] ModelImporter.cpp:661: Make sure input orig_target_sizes has Int64 binding.
|
| 133 |
+
[01/20/2026-06:47:47] [W] [TRT] ModelImporter.cpp:908: Make sure output labels has Int64 binding.
|
| 134 |
+
[01/20/2026-06:47:47] [I] Finished parsing network model. Parse time: 0.0226951
|
| 135 |
+
[01/20/2026-06:47:47] [I] Set shape of input tensor images for optimization profile 0 to: MIN=1x3x320x320 OPT=1x3x320x320 MAX=1x3x320x320
|
| 136 |
+
[01/20/2026-06:47:47] [I] Set shape of input tensor orig_target_sizes for optimization profile 0 to: MIN=1x2 OPT=1x2 MAX=1x2
|
| 137 |
+
[01/20/2026-06:47:48] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +205, GPU +4, now: CPU 513, GPU 10551 (MiB)
|
| 138 |
+
[01/20/2026-06:47:48] [W] [TRT] Detected layernorm nodes in FP16.
|
| 139 |
+
[01/20/2026-06:47:48] [W] [TRT] Running layernorm after self-attention with FP16 Reduce or Pow may cause overflow. Forcing Reduce or Pow Layers in FP32 precision, or exporting the model to use INormalizationLayer (available with ONNX opset >= 17) can help preserving accuracy.
|
| 140 |
+
[01/20/2026-06:47:48] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored.
|
| 141 |
+
[01/20/2026-06:48:52] [I] [TRT] Compiler backend is used during engine build.
|
| 142 |
+
[01/20/2026-06:49:21] [I] [TRT] Detected 2 inputs and 3 output network tensors.
|
| 143 |
+
[01/20/2026-06:49:22] [I] [TRT] Total Host Persistent Memory: 309840 bytes
|
| 144 |
+
[01/20/2026-06:49:22] [I] [TRT] Total Device Persistent Memory: 0 bytes
|
| 145 |
+
[01/20/2026-06:49:22] [I] [TRT] Max Scratch Memory: 549888 bytes
|
| 146 |
+
[01/20/2026-06:49:22] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 94 steps to complete.
|
| 147 |
+
[01/20/2026-06:49:22] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 1.7421ms to assign 10 blocks to 94 nodes requiring 3110400 bytes.
|
| 148 |
+
[01/20/2026-06:49:22] [I] [TRT] Total Activation Memory: 3110400 bytes
|
| 149 |
+
[01/20/2026-06:49:22] [I] [TRT] Total Weights Memory: 1056384 bytes
|
| 150 |
+
[01/20/2026-06:49:22] [I] [TRT] Compiler backend is used during engine execution.
|
| 151 |
+
[01/20/2026-06:49:22] [I] [TRT] Engine generation completed in 93.9575 seconds.
|
| 152 |
+
[01/20/2026-06:49:22] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 0 MiB, GPU 7 MiB
|
| 153 |
+
[01/20/2026-06:49:22] [I] Created engine with size: 3.53746 MiB
|
| 154 |
+
[01/20/2026-06:49:22] [I] Engine built in 94.1877 sec.
|
| 155 |
+
[01/20/2026-06:49:22] [I] [TRT] Loaded engine size: 3 MiB
|
| 156 |
+
[01/20/2026-06:49:22] [I] Engine deserialized in 0.00877616 sec.
|
| 157 |
+
[01/20/2026-06:49:22] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +2, now: CPU 0, GPU 3 (MiB)
|
| 158 |
+
[01/20/2026-06:49:22] [I] Setting persistentCacheLimit to 0 bytes.
|
| 159 |
+
[01/20/2026-06:49:22] [I] Created execution context with device memory size: 2.96631 MiB
|
| 160 |
+
[01/20/2026-06:49:22] [I] Using random values for input images
|
| 161 |
+
[01/20/2026-06:49:22] [I] Input binding for images with dimensions 1x3x320x320 is created.
|
| 162 |
+
[01/20/2026-06:49:22] [I] Using random values for input orig_target_sizes
|
| 163 |
+
[01/20/2026-06:49:22] [I] Input binding for orig_target_sizes with dimensions 1x2 is created.
|
| 164 |
+
[01/20/2026-06:49:22] [I] Output binding for labels with dimensions 1x300 is created.
|
| 165 |
+
[01/20/2026-06:49:22] [I] Output binding for boxes with dimensions 1x300x4 is created.
|
| 166 |
+
[01/20/2026-06:49:22] [I] Output binding for scores with dimensions 1x300 is created.
|
| 167 |
+
[01/20/2026-06:49:22] [I] Starting inference
|
| 168 |
+
[01/20/2026-06:49:25] [I] Warmup completed 410 queries over 200 ms
|
| 169 |
+
[01/20/2026-06:49:25] [I] Timing trace has 6159 queries over 3.00145 s
|
| 170 |
+
[01/20/2026-06:49:25] [I]
|
| 171 |
+
[01/20/2026-06:49:25] [I] === Trace details ===
|
| 172 |
+
[01/20/2026-06:49:25] [I] Trace averages of 10 runs:
|
| 173 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483493 ms - Host latency: 0.550644 ms (enqueue 0.237411 ms)
|
| 174 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483664 ms - Host latency: 0.551686 ms (enqueue 0.235919 ms)
|
| 175 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483232 ms - Host latency: 0.551836 ms (enqueue 0.237135 ms)
|
| 176 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483195 ms - Host latency: 0.551399 ms (enqueue 0.237595 ms)
|
| 177 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483177 ms - Host latency: 0.550945 ms (enqueue 0.230566 ms)
|
| 178 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483125 ms - Host latency: 0.551932 ms (enqueue 0.235378 ms)
|
| 179 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483189 ms - Host latency: 0.552023 ms (enqueue 0.235298 ms)
|
| 180 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483739 ms - Host latency: 0.551956 ms (enqueue 0.229178 ms)
|
| 181 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48336 ms - Host latency: 0.5521 ms (enqueue 0.228499 ms)
|
| 182 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482953 ms - Host latency: 0.551534 ms (enqueue 0.230544 ms)
|
| 183 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48364 ms - Host latency: 0.550916 ms (enqueue 0.241107 ms)
|
| 184 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483235 ms - Host latency: 0.549185 ms (enqueue 0.250107 ms)
|
| 185 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483377 ms - Host latency: 0.551788 ms (enqueue 0.239841 ms)
|
| 186 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483148 ms - Host latency: 0.550397 ms (enqueue 0.237457 ms)
|
| 187 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.4832 ms - Host latency: 0.551169 ms (enqueue 0.240268 ms)
|
| 188 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482858 ms - Host latency: 0.550052 ms (enqueue 0.241885 ms)
|
| 189 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483218 ms - Host latency: 0.551089 ms (enqueue 0.237079 ms)
|
| 190 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483362 ms - Host latency: 0.551367 ms (enqueue 0.236578 ms)
|
| 191 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483371 ms - Host latency: 0.55202 ms (enqueue 0.238092 ms)
|
| 192 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482999 ms - Host latency: 0.55144 ms (enqueue 0.235803 ms)
|
| 193 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483487 ms - Host latency: 0.552356 ms (enqueue 0.236642 ms)
|
| 194 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483508 ms - Host latency: 0.550616 ms (enqueue 0.242563 ms)
|
| 195 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48309 ms - Host latency: 0.551785 ms (enqueue 0.236868 ms)
|
| 196 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483716 ms - Host latency: 0.552423 ms (enqueue 0.237598 ms)
|
| 197 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483661 ms - Host latency: 0.550696 ms (enqueue 0.236258 ms)
|
| 198 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483801 ms - Host latency: 0.5487 ms (enqueue 0.238522 ms)
|
| 199 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483105 ms - Host latency: 0.550406 ms (enqueue 0.240771 ms)
|
| 200 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48378 ms - Host latency: 0.551852 ms (enqueue 0.241187 ms)
|
| 201 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483493 ms - Host latency: 0.551849 ms (enqueue 0.242581 ms)
|
| 202 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483362 ms - Host latency: 0.551971 ms (enqueue 0.236264 ms)
|
| 203 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483414 ms - Host latency: 0.551099 ms (enqueue 0.242188 ms)
|
| 204 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483173 ms - Host latency: 0.551962 ms (enqueue 0.240903 ms)
|
| 205 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483618 ms - Host latency: 0.551672 ms (enqueue 0.235983 ms)
|
| 206 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483337 ms - Host latency: 0.551862 ms (enqueue 0.235315 ms)
|
| 207 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482913 ms - Host latency: 0.550668 ms (enqueue 0.235211 ms)
|
| 208 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483322 ms - Host latency: 0.54957 ms (enqueue 0.235352 ms)
|
| 209 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48327 ms - Host latency: 0.550952 ms (enqueue 0.236246 ms)
|
| 210 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483545 ms - Host latency: 0.551862 ms (enqueue 0.230112 ms)
|
| 211 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483057 ms - Host latency: 0.551337 ms (enqueue 0.22905 ms)
|
| 212 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483411 ms - Host latency: 0.55249 ms (enqueue 0.235843 ms)
|
| 213 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483932 ms - Host latency: 0.552423 ms (enqueue 0.230072 ms)
|
| 214 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483499 ms - Host latency: 0.551916 ms (enqueue 0.232037 ms)
|
| 215 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483261 ms - Host latency: 0.55191 ms (enqueue 0.23385 ms)
|
| 216 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483575 ms - Host latency: 0.552084 ms (enqueue 0.23002 ms)
|
| 217 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483646 ms - Host latency: 0.552176 ms (enqueue 0.229797 ms)
|
| 218 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483408 ms - Host latency: 0.552032 ms (enqueue 0.228992 ms)
|
| 219 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48367 ms - Host latency: 0.552097 ms (enqueue 0.228949 ms)
|
| 220 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483118 ms - Host latency: 0.549811 ms (enqueue 0.243063 ms)
|
| 221 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483426 ms - Host latency: 0.551538 ms (enqueue 0.234665 ms)
|
| 222 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483667 ms - Host latency: 0.55159 ms (enqueue 0.233054 ms)
|
| 223 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48374 ms - Host latency: 0.551102 ms (enqueue 0.232806 ms)
|
| 224 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483316 ms - Host latency: 0.551147 ms (enqueue 0.230063 ms)
|
| 225 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483566 ms - Host latency: 0.550214 ms (enqueue 0.237573 ms)
|
| 226 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483142 ms - Host latency: 0.548727 ms (enqueue 0.235394 ms)
|
| 227 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483969 ms - Host latency: 0.552365 ms (enqueue 0.238025 ms)
|
| 228 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483621 ms - Host latency: 0.550256 ms (enqueue 0.232867 ms)
|
| 229 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483463 ms - Host latency: 0.551761 ms (enqueue 0.232382 ms)
|
| 230 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483713 ms - Host latency: 0.551727 ms (enqueue 0.231509 ms)
|
| 231 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483459 ms - Host latency: 0.551248 ms (enqueue 0.231976 ms)
|
| 232 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483405 ms - Host latency: 0.551947 ms (enqueue 0.231641 ms)
|
| 233 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483322 ms - Host latency: 0.552026 ms (enqueue 0.230862 ms)
|
| 234 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483606 ms - Host latency: 0.551801 ms (enqueue 0.23371 ms)
|
| 235 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483475 ms - Host latency: 0.551251 ms (enqueue 0.244135 ms)
|
| 236 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484045 ms - Host latency: 0.552402 ms (enqueue 0.232333 ms)
|
| 237 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48396 ms - Host latency: 0.551007 ms (enqueue 0.239896 ms)
|
| 238 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483874 ms - Host latency: 0.552319 ms (enqueue 0.230249 ms)
|
| 239 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483582 ms - Host latency: 0.552106 ms (enqueue 0.230695 ms)
|
| 240 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483667 ms - Host latency: 0.551569 ms (enqueue 0.235895 ms)
|
| 241 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483417 ms - Host latency: 0.551843 ms (enqueue 0.231793 ms)
|
| 242 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483923 ms - Host latency: 0.552203 ms (enqueue 0.230969 ms)
|
| 243 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483551 ms - Host latency: 0.55119 ms (enqueue 0.233881 ms)
|
| 244 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483087 ms - Host latency: 0.551062 ms (enqueue 0.235449 ms)
|
| 245 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483569 ms - Host latency: 0.552264 ms (enqueue 0.2362 ms)
|
| 246 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483478 ms - Host latency: 0.550897 ms (enqueue 0.231531 ms)
|
| 247 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483679 ms - Host latency: 0.552588 ms (enqueue 0.230682 ms)
|
| 248 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482996 ms - Host latency: 0.550311 ms (enqueue 0.236902 ms)
|
| 249 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48316 ms - Host latency: 0.548242 ms (enqueue 0.250195 ms)
|
| 250 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483826 ms - Host latency: 0.550616 ms (enqueue 0.239978 ms)
|
| 251 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483087 ms - Host latency: 0.548212 ms (enqueue 0.246417 ms)
|
| 252 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482916 ms - Host latency: 0.546899 ms (enqueue 0.260919 ms)
|
| 253 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483289 ms - Host latency: 0.551581 ms (enqueue 0.239581 ms)
|
| 254 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483221 ms - Host latency: 0.551019 ms (enqueue 0.235217 ms)
|
| 255 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48371 ms - Host latency: 0.549835 ms (enqueue 0.257031 ms)
|
| 256 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483514 ms - Host latency: 0.552057 ms (enqueue 0.238538 ms)
|
| 257 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483539 ms - Host latency: 0.551801 ms (enqueue 0.234943 ms)
|
| 258 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483649 ms - Host latency: 0.552356 ms (enqueue 0.237628 ms)
|
| 259 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483478 ms - Host latency: 0.5526 ms (enqueue 0.249567 ms)
|
| 260 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483594 ms - Host latency: 0.550323 ms (enqueue 0.26394 ms)
|
| 261 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483136 ms - Host latency: 0.551611 ms (enqueue 0.239789 ms)
|
| 262 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483148 ms - Host latency: 0.550983 ms (enqueue 0.24447 ms)
|
| 263 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482843 ms - Host latency: 0.551227 ms (enqueue 0.235712 ms)
|
| 264 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483435 ms - Host latency: 0.551935 ms (enqueue 0.231561 ms)
|
| 265 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483472 ms - Host latency: 0.551257 ms (enqueue 0.273175 ms)
|
| 266 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.486182 ms - Host latency: 0.552386 ms (enqueue 0.350195 ms)
|
| 267 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483203 ms - Host latency: 0.551056 ms (enqueue 0.239465 ms)
|
| 268 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483368 ms - Host latency: 0.549957 ms (enqueue 0.234772 ms)
|
| 269 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484204 ms - Host latency: 0.552661 ms (enqueue 0.241553 ms)
|
| 270 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483594 ms - Host latency: 0.550549 ms (enqueue 0.235034 ms)
|
| 271 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.4836 ms - Host latency: 0.552002 ms (enqueue 0.230023 ms)
|
| 272 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483746 ms - Host latency: 0.552136 ms (enqueue 0.231274 ms)
|
| 273 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483551 ms - Host latency: 0.55199 ms (enqueue 0.23186 ms)
|
| 274 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483636 ms - Host latency: 0.552106 ms (enqueue 0.233057 ms)
|
| 275 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483453 ms - Host latency: 0.551849 ms (enqueue 0.230365 ms)
|
| 276 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483624 ms - Host latency: 0.552106 ms (enqueue 0.230182 ms)
|
| 277 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483569 ms - Host latency: 0.552032 ms (enqueue 0.231067 ms)
|
| 278 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483276 ms - Host latency: 0.551764 ms (enqueue 0.22948 ms)
|
| 279 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483856 ms - Host latency: 0.552447 ms (enqueue 0.243408 ms)
|
| 280 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483783 ms - Host latency: 0.551508 ms (enqueue 0.291119 ms)
|
| 281 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.499664 ms - Host latency: 0.565497 ms (enqueue 0.542895 ms)
|
| 282 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.492035 ms - Host latency: 0.558783 ms (enqueue 0.422308 ms)
|
| 283 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483447 ms - Host latency: 0.551215 ms (enqueue 0.235437 ms)
|
| 284 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483624 ms - Host latency: 0.552002 ms (enqueue 0.232642 ms)
|
| 285 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483667 ms - Host latency: 0.55213 ms (enqueue 0.239392 ms)
|
| 286 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483813 ms - Host latency: 0.552545 ms (enqueue 0.239325 ms)
|
| 287 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482739 ms - Host latency: 0.551154 ms (enqueue 0.23595 ms)
|
| 288 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482886 ms - Host latency: 0.549908 ms (enqueue 0.237085 ms)
|
| 289 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483643 ms - Host latency: 0.551483 ms (enqueue 0.238495 ms)
|
| 290 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483411 ms - Host latency: 0.550732 ms (enqueue 0.23858 ms)
|
| 291 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483435 ms - Host latency: 0.551788 ms (enqueue 0.244568 ms)
|
| 292 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48407 ms - Host latency: 0.552441 ms (enqueue 0.233093 ms)
|
| 293 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483496 ms - Host latency: 0.552185 ms (enqueue 0.230676 ms)
|
| 294 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483984 ms - Host latency: 0.552332 ms (enqueue 0.231989 ms)
|
| 295 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483386 ms - Host latency: 0.551733 ms (enqueue 0.229919 ms)
|
| 296 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483582 ms - Host latency: 0.551422 ms (enqueue 0.236633 ms)
|
| 297 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483691 ms - Host latency: 0.552094 ms (enqueue 0.23324 ms)
|
| 298 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483636 ms - Host latency: 0.551935 ms (enqueue 0.230835 ms)
|
| 299 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48371 ms - Host latency: 0.552264 ms (enqueue 0.229718 ms)
|
| 300 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482886 ms - Host latency: 0.550476 ms (enqueue 0.232477 ms)
|
| 301 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484186 ms - Host latency: 0.551404 ms (enqueue 0.30307 ms)
|
| 302 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483862 ms - Host latency: 0.550946 ms (enqueue 0.235144 ms)
|
| 303 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483331 ms - Host latency: 0.551685 ms (enqueue 0.22981 ms)
|
| 304 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483472 ms - Host latency: 0.551813 ms (enqueue 0.230005 ms)
|
| 305 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483594 ms - Host latency: 0.550458 ms (enqueue 0.364935 ms)
|
| 306 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.500037 ms - Host latency: 0.566278 ms (enqueue 0.490198 ms)
|
| 307 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483783 ms - Host latency: 0.552094 ms (enqueue 0.230377 ms)
|
| 308 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483307 ms - Host latency: 0.550427 ms (enqueue 0.240576 ms)
|
| 309 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483673 ms - Host latency: 0.550018 ms (enqueue 0.238806 ms)
|
| 310 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483582 ms - Host latency: 0.551599 ms (enqueue 0.312714 ms)
|
| 311 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.505884 ms - Host latency: 0.570703 ms (enqueue 0.547034 ms)
|
| 312 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.496564 ms - Host latency: 0.564081 ms (enqueue 0.537042 ms)
|
| 313 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.491406 ms - Host latency: 0.557794 ms (enqueue 0.363617 ms)
|
| 314 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483319 ms - Host latency: 0.551801 ms (enqueue 0.237073 ms)
|
| 315 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483777 ms - Host latency: 0.551874 ms (enqueue 0.236353 ms)
|
| 316 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483264 ms - Host latency: 0.552155 ms (enqueue 0.237836 ms)
|
| 317 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483075 ms - Host latency: 0.552069 ms (enqueue 0.24397 ms)
|
| 318 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483771 ms - Host latency: 0.551886 ms (enqueue 0.241467 ms)
|
| 319 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483588 ms - Host latency: 0.551642 ms (enqueue 0.241998 ms)
|
| 320 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483551 ms - Host latency: 0.551971 ms (enqueue 0.242389 ms)
|
| 321 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483209 ms - Host latency: 0.551099 ms (enqueue 0.231537 ms)
|
| 322 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483392 ms - Host latency: 0.550787 ms (enqueue 0.240814 ms)
|
| 323 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483148 ms - Host latency: 0.551886 ms (enqueue 0.238904 ms)
|
| 324 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483203 ms - Host latency: 0.550421 ms (enqueue 0.242639 ms)
|
| 325 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483075 ms - Host latency: 0.551874 ms (enqueue 0.242914 ms)
|
| 326 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483813 ms - Host latency: 0.552478 ms (enqueue 0.238898 ms)
|
| 327 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482941 ms - Host latency: 0.551477 ms (enqueue 0.240027 ms)
|
| 328 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483167 ms - Host latency: 0.550494 ms (enqueue 0.240094 ms)
|
| 329 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483661 ms - Host latency: 0.549585 ms (enqueue 0.241138 ms)
|
| 330 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483636 ms - Host latency: 0.55199 ms (enqueue 0.237109 ms)
|
| 331 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483301 ms - Host latency: 0.550751 ms (enqueue 0.243207 ms)
|
| 332 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483649 ms - Host latency: 0.550836 ms (enqueue 0.244067 ms)
|
| 333 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483496 ms - Host latency: 0.549878 ms (enqueue 0.242584 ms)
|
| 334 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483508 ms - Host latency: 0.55163 ms (enqueue 0.238141 ms)
|
| 335 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483508 ms - Host latency: 0.550812 ms (enqueue 0.24259 ms)
|
| 336 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483099 ms - Host latency: 0.550452 ms (enqueue 0.241364 ms)
|
| 337 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483484 ms - Host latency: 0.552087 ms (enqueue 0.24527 ms)
|
| 338 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483765 ms - Host latency: 0.552576 ms (enqueue 0.237128 ms)
|
| 339 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48335 ms - Host latency: 0.552179 ms (enqueue 0.235059 ms)
|
| 340 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483606 ms - Host latency: 0.552203 ms (enqueue 0.235303 ms)
|
| 341 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483032 ms - Host latency: 0.551843 ms (enqueue 0.23479 ms)
|
| 342 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483667 ms - Host latency: 0.552252 ms (enqueue 0.240533 ms)
|
| 343 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483545 ms - Host latency: 0.552014 ms (enqueue 0.237183 ms)
|
| 344 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483728 ms - Host latency: 0.552393 ms (enqueue 0.236047 ms)
|
| 345 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483411 ms - Host latency: 0.551794 ms (enqueue 0.235559 ms)
|
| 346 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483606 ms - Host latency: 0.552295 ms (enqueue 0.235779 ms)
|
| 347 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483484 ms - Host latency: 0.552344 ms (enqueue 0.234644 ms)
|
| 348 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483582 ms - Host latency: 0.5521 ms (enqueue 0.236621 ms)
|
| 349 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483411 ms - Host latency: 0.551562 ms (enqueue 0.253418 ms)
|
| 350 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48385 ms - Host latency: 0.552466 ms (enqueue 0.234717 ms)
|
| 351 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483057 ms - Host latency: 0.55155 ms (enqueue 0.234729 ms)
|
| 352 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48385 ms - Host latency: 0.552209 ms (enqueue 0.240186 ms)
|
| 353 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483484 ms - Host latency: 0.552393 ms (enqueue 0.234631 ms)
|
| 354 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483179 ms - Host latency: 0.55188 ms (enqueue 0.235986 ms)
|
| 355 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483789 ms - Host latency: 0.552698 ms (enqueue 0.234534 ms)
|
| 356 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483313 ms - Host latency: 0.551721 ms (enqueue 0.232874 ms)
|
| 357 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483533 ms - Host latency: 0.551282 ms (enqueue 0.231604 ms)
|
| 358 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483569 ms - Host latency: 0.552393 ms (enqueue 0.2323 ms)
|
| 359 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483215 ms - Host latency: 0.551868 ms (enqueue 0.231165 ms)
|
| 360 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483411 ms - Host latency: 0.549634 ms (enqueue 0.232007 ms)
|
| 361 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483191 ms - Host latency: 0.549854 ms (enqueue 0.239929 ms)
|
| 362 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483911 ms - Host latency: 0.552112 ms (enqueue 0.243469 ms)
|
| 363 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483228 ms - Host latency: 0.551758 ms (enqueue 0.236511 ms)
|
| 364 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483899 ms - Host latency: 0.551526 ms (enqueue 0.234119 ms)
|
| 365 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483313 ms - Host latency: 0.551892 ms (enqueue 0.238037 ms)
|
| 366 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483655 ms - Host latency: 0.552295 ms (enqueue 0.234473 ms)
|
| 367 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483765 ms - Host latency: 0.55249 ms (enqueue 0.234412 ms)
|
| 368 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483386 ms - Host latency: 0.551916 ms (enqueue 0.235449 ms)
|
| 369 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483765 ms - Host latency: 0.552393 ms (enqueue 0.234949 ms)
|
| 370 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483557 ms - Host latency: 0.551587 ms (enqueue 0.234387 ms)
|
| 371 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483606 ms - Host latency: 0.551379 ms (enqueue 0.234155 ms)
|
| 372 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484119 ms - Host latency: 0.552893 ms (enqueue 0.236658 ms)
|
| 373 |
+
[01/20/2026-06:49:25] [I] ... Omitting 2159 lines
|
| 374 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483594 ms - Host latency: 0.552173 ms (enqueue 0.232959 ms)
|
| 375 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483936 ms - Host latency: 0.552417 ms (enqueue 0.23186 ms)
|
| 376 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482983 ms - Host latency: 0.551465 ms (enqueue 0.231934 ms)
|
| 377 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483154 ms - Host latency: 0.551904 ms (enqueue 0.233691 ms)
|
| 378 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482935 ms - Host latency: 0.551294 ms (enqueue 0.231836 ms)
|
| 379 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483374 ms - Host latency: 0.551147 ms (enqueue 0.234033 ms)
|
| 380 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482886 ms - Host latency: 0.550952 ms (enqueue 0.236523 ms)
|
| 381 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482959 ms - Host latency: 0.550757 ms (enqueue 0.23606 ms)
|
| 382 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48335 ms - Host latency: 0.550269 ms (enqueue 0.237036 ms)
|
| 383 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482983 ms - Host latency: 0.551611 ms (enqueue 0.23418 ms)
|
| 384 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483057 ms - Host latency: 0.551587 ms (enqueue 0.232397 ms)
|
| 385 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483325 ms - Host latency: 0.552002 ms (enqueue 0.23269 ms)
|
| 386 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48335 ms - Host latency: 0.551953 ms (enqueue 0.232153 ms)
|
| 387 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483472 ms - Host latency: 0.552344 ms (enqueue 0.232812 ms)
|
| 388 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483765 ms - Host latency: 0.552368 ms (enqueue 0.231909 ms)
|
| 389 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483325 ms - Host latency: 0.551709 ms (enqueue 0.2323 ms)
|
| 390 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48291 ms - Host latency: 0.551636 ms (enqueue 0.231763 ms)
|
| 391 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483447 ms - Host latency: 0.551733 ms (enqueue 0.231763 ms)
|
| 392 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483301 ms - Host latency: 0.551978 ms (enqueue 0.233228 ms)
|
| 393 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48269 ms - Host latency: 0.55061 ms (enqueue 0.232251 ms)
|
| 394 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483179 ms - Host latency: 0.551392 ms (enqueue 0.231348 ms)
|
| 395 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483276 ms - Host latency: 0.551855 ms (enqueue 0.232764 ms)
|
| 396 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483398 ms - Host latency: 0.551978 ms (enqueue 0.231543 ms)
|
| 397 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483008 ms - Host latency: 0.551636 ms (enqueue 0.23269 ms)
|
| 398 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483496 ms - Host latency: 0.551416 ms (enqueue 0.237427 ms)
|
| 399 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483569 ms - Host latency: 0.552197 ms (enqueue 0.235425 ms)
|
| 400 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483423 ms - Host latency: 0.551831 ms (enqueue 0.232031 ms)
|
| 401 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483325 ms - Host latency: 0.551978 ms (enqueue 0.232275 ms)
|
| 402 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483276 ms - Host latency: 0.551074 ms (enqueue 0.232227 ms)
|
| 403 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.486475 ms - Host latency: 0.552222 ms (enqueue 0.370728 ms)
|
| 404 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.497339 ms - Host latency: 0.562793 ms (enqueue 0.53606 ms)
|
| 405 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.498169 ms - Host latency: 0.566064 ms (enqueue 0.535669 ms)
|
| 406 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.498364 ms - Host latency: 0.566431 ms (enqueue 0.535181 ms)
|
| 407 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.494067 ms - Host latency: 0.559985 ms (enqueue 0.497925 ms)
|
| 408 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483643 ms - Host latency: 0.552441 ms (enqueue 0.260767 ms)
|
| 409 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483521 ms - Host latency: 0.552441 ms (enqueue 0.255469 ms)
|
| 410 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483838 ms - Host latency: 0.552832 ms (enqueue 0.255225 ms)
|
| 411 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483398 ms - Host latency: 0.552368 ms (enqueue 0.255737 ms)
|
| 412 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483423 ms - Host latency: 0.552124 ms (enqueue 0.256323 ms)
|
| 413 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483447 ms - Host latency: 0.552124 ms (enqueue 0.255078 ms)
|
| 414 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483203 ms - Host latency: 0.552197 ms (enqueue 0.255786 ms)
|
| 415 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483105 ms - Host latency: 0.551855 ms (enqueue 0.256372 ms)
|
| 416 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483423 ms - Host latency: 0.552271 ms (enqueue 0.257324 ms)
|
| 417 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483447 ms - Host latency: 0.552222 ms (enqueue 0.255762 ms)
|
| 418 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483423 ms - Host latency: 0.552393 ms (enqueue 0.255542 ms)
|
| 419 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483521 ms - Host latency: 0.552222 ms (enqueue 0.255908 ms)
|
| 420 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483545 ms - Host latency: 0.552441 ms (enqueue 0.254736 ms)
|
| 421 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483569 ms - Host latency: 0.552368 ms (enqueue 0.255713 ms)
|
| 422 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484009 ms - Host latency: 0.552246 ms (enqueue 0.261938 ms)
|
| 423 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483008 ms - Host latency: 0.550195 ms (enqueue 0.233862 ms)
|
| 424 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482861 ms - Host latency: 0.549609 ms (enqueue 0.244531 ms)
|
| 425 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483521 ms - Host latency: 0.552026 ms (enqueue 0.232007 ms)
|
| 426 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483203 ms - Host latency: 0.551611 ms (enqueue 0.231567 ms)
|
| 427 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483276 ms - Host latency: 0.551245 ms (enqueue 0.231787 ms)
|
| 428 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483325 ms - Host latency: 0.55188 ms (enqueue 0.231323 ms)
|
| 429 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482959 ms - Host latency: 0.551489 ms (enqueue 0.231177 ms)
|
| 430 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483081 ms - Host latency: 0.551367 ms (enqueue 0.231055 ms)
|
| 431 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48269 ms - Host latency: 0.55105 ms (enqueue 0.232104 ms)
|
| 432 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483374 ms - Host latency: 0.551636 ms (enqueue 0.230835 ms)
|
| 433 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48335 ms - Host latency: 0.551733 ms (enqueue 0.230811 ms)
|
| 434 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483252 ms - Host latency: 0.551855 ms (enqueue 0.231299 ms)
|
| 435 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483325 ms - Host latency: 0.551709 ms (enqueue 0.230811 ms)
|
| 436 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483325 ms - Host latency: 0.55166 ms (enqueue 0.230811 ms)
|
| 437 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482764 ms - Host latency: 0.55127 ms (enqueue 0.230664 ms)
|
| 438 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483325 ms - Host latency: 0.551953 ms (enqueue 0.231787 ms)
|
| 439 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482935 ms - Host latency: 0.551221 ms (enqueue 0.231567 ms)
|
| 440 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483057 ms - Host latency: 0.551587 ms (enqueue 0.231274 ms)
|
| 441 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483057 ms - Host latency: 0.551953 ms (enqueue 0.231372 ms)
|
| 442 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48291 ms - Host latency: 0.551367 ms (enqueue 0.232544 ms)
|
| 443 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483618 ms - Host latency: 0.552002 ms (enqueue 0.232788 ms)
|
| 444 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483252 ms - Host latency: 0.550928 ms (enqueue 0.235498 ms)
|
| 445 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48335 ms - Host latency: 0.551758 ms (enqueue 0.231592 ms)
|
| 446 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483325 ms - Host latency: 0.551367 ms (enqueue 0.233813 ms)
|
| 447 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483325 ms - Host latency: 0.551929 ms (enqueue 0.231641 ms)
|
| 448 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483228 ms - Host latency: 0.551733 ms (enqueue 0.232935 ms)
|
| 449 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483594 ms - Host latency: 0.551074 ms (enqueue 0.242847 ms)
|
| 450 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48335 ms - Host latency: 0.550903 ms (enqueue 0.240112 ms)
|
| 451 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483276 ms - Host latency: 0.552173 ms (enqueue 0.227588 ms)
|
| 452 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482617 ms - Host latency: 0.551367 ms (enqueue 0.224902 ms)
|
| 453 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48335 ms - Host latency: 0.55188 ms (enqueue 0.22522 ms)
|
| 454 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482788 ms - Host latency: 0.551318 ms (enqueue 0.225488 ms)
|
| 455 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482935 ms - Host latency: 0.55144 ms (enqueue 0.227734 ms)
|
| 456 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483398 ms - Host latency: 0.551929 ms (enqueue 0.224854 ms)
|
| 457 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48396 ms - Host latency: 0.552808 ms (enqueue 0.229346 ms)
|
| 458 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483398 ms - Host latency: 0.551123 ms (enqueue 0.232812 ms)
|
| 459 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483179 ms - Host latency: 0.548096 ms (enqueue 0.236792 ms)
|
| 460 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483179 ms - Host latency: 0.548218 ms (enqueue 0.239722 ms)
|
| 461 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483813 ms - Host latency: 0.550293 ms (enqueue 0.241675 ms)
|
| 462 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483252 ms - Host latency: 0.551807 ms (enqueue 0.229492 ms)
|
| 463 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483667 ms - Host latency: 0.551562 ms (enqueue 0.234375 ms)
|
| 464 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484082 ms - Host latency: 0.550879 ms (enqueue 0.289185 ms)
|
| 465 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.512231 ms - Host latency: 0.578516 ms (enqueue 0.556885 ms)
|
| 466 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.515991 ms - Host latency: 0.581445 ms (enqueue 0.531567 ms)
|
| 467 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483447 ms - Host latency: 0.552026 ms (enqueue 0.248267 ms)
|
| 468 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48313 ms - Host latency: 0.551929 ms (enqueue 0.253418 ms)
|
| 469 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483521 ms - Host latency: 0.552148 ms (enqueue 0.252075 ms)
|
| 470 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482983 ms - Host latency: 0.551611 ms (enqueue 0.23877 ms)
|
| 471 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483862 ms - Host latency: 0.552637 ms (enqueue 0.237354 ms)
|
| 472 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483081 ms - Host latency: 0.551514 ms (enqueue 0.235791 ms)
|
| 473 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483765 ms - Host latency: 0.552368 ms (enqueue 0.239941 ms)
|
| 474 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483618 ms - Host latency: 0.552539 ms (enqueue 0.240186 ms)
|
| 475 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483301 ms - Host latency: 0.551855 ms (enqueue 0.239453 ms)
|
| 476 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484082 ms - Host latency: 0.550586 ms (enqueue 0.260913 ms)
|
| 477 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483643 ms - Host latency: 0.5521 ms (enqueue 0.228613 ms)
|
| 478 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483862 ms - Host latency: 0.552441 ms (enqueue 0.227979 ms)
|
| 479 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483911 ms - Host latency: 0.552222 ms (enqueue 0.231055 ms)
|
| 480 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483667 ms - Host latency: 0.551978 ms (enqueue 0.23208 ms)
|
| 481 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483301 ms - Host latency: 0.55061 ms (enqueue 0.245142 ms)
|
| 482 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483325 ms - Host latency: 0.549927 ms (enqueue 0.24729 ms)
|
| 483 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48313 ms - Host latency: 0.55166 ms (enqueue 0.232886 ms)
|
| 484 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483496 ms - Host latency: 0.550464 ms (enqueue 0.232764 ms)
|
| 485 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483398 ms - Host latency: 0.551904 ms (enqueue 0.232007 ms)
|
| 486 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483569 ms - Host latency: 0.552026 ms (enqueue 0.231055 ms)
|
| 487 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484106 ms - Host latency: 0.55249 ms (enqueue 0.230444 ms)
|
| 488 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483667 ms - Host latency: 0.551978 ms (enqueue 0.230396 ms)
|
| 489 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483301 ms - Host latency: 0.550806 ms (enqueue 0.231079 ms)
|
| 490 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483765 ms - Host latency: 0.551953 ms (enqueue 0.237061 ms)
|
| 491 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483179 ms - Host latency: 0.550952 ms (enqueue 0.23689 ms)
|
| 492 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483813 ms - Host latency: 0.552051 ms (enqueue 0.231372 ms)
|
| 493 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483813 ms - Host latency: 0.552441 ms (enqueue 0.229468 ms)
|
| 494 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483545 ms - Host latency: 0.551807 ms (enqueue 0.230884 ms)
|
| 495 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483325 ms - Host latency: 0.551025 ms (enqueue 0.230371 ms)
|
| 496 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483862 ms - Host latency: 0.552368 ms (enqueue 0.229785 ms)
|
| 497 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483887 ms - Host latency: 0.552271 ms (enqueue 0.229199 ms)
|
| 498 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483716 ms - Host latency: 0.552515 ms (enqueue 0.22937 ms)
|
| 499 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483691 ms - Host latency: 0.552246 ms (enqueue 0.233618 ms)
|
| 500 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483521 ms - Host latency: 0.551831 ms (enqueue 0.230542 ms)
|
| 501 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483618 ms - Host latency: 0.552051 ms (enqueue 0.229321 ms)
|
| 502 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483643 ms - Host latency: 0.552002 ms (enqueue 0.228613 ms)
|
| 503 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483716 ms - Host latency: 0.551636 ms (enqueue 0.23689 ms)
|
| 504 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484033 ms - Host latency: 0.552441 ms (enqueue 0.230566 ms)
|
| 505 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483228 ms - Host latency: 0.551636 ms (enqueue 0.233374 ms)
|
| 506 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483838 ms - Host latency: 0.552588 ms (enqueue 0.230957 ms)
|
| 507 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483789 ms - Host latency: 0.552148 ms (enqueue 0.230054 ms)
|
| 508 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483398 ms - Host latency: 0.551562 ms (enqueue 0.240503 ms)
|
| 509 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483643 ms - Host latency: 0.552393 ms (enqueue 0.232324 ms)
|
| 510 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483936 ms - Host latency: 0.552368 ms (enqueue 0.232397 ms)
|
| 511 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483374 ms - Host latency: 0.551733 ms (enqueue 0.232275 ms)
|
| 512 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48335 ms - Host latency: 0.552075 ms (enqueue 0.232227 ms)
|
| 513 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484009 ms - Host latency: 0.552661 ms (enqueue 0.23501 ms)
|
| 514 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483838 ms - Host latency: 0.552197 ms (enqueue 0.233667 ms)
|
| 515 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483911 ms - Host latency: 0.551685 ms (enqueue 0.23418 ms)
|
| 516 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484009 ms - Host latency: 0.552563 ms (enqueue 0.231787 ms)
|
| 517 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483667 ms - Host latency: 0.552417 ms (enqueue 0.235303 ms)
|
| 518 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483594 ms - Host latency: 0.552197 ms (enqueue 0.231763 ms)
|
| 519 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483105 ms - Host latency: 0.550122 ms (enqueue 0.233228 ms)
|
| 520 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483325 ms - Host latency: 0.551245 ms (enqueue 0.235498 ms)
|
| 521 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483105 ms - Host latency: 0.551074 ms (enqueue 0.232593 ms)
|
| 522 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483618 ms - Host latency: 0.550659 ms (enqueue 0.237305 ms)
|
| 523 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483813 ms - Host latency: 0.552344 ms (enqueue 0.235278 ms)
|
| 524 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483789 ms - Host latency: 0.552466 ms (enqueue 0.232227 ms)
|
| 525 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483862 ms - Host latency: 0.552319 ms (enqueue 0.232568 ms)
|
| 526 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48418 ms - Host latency: 0.552466 ms (enqueue 0.232178 ms)
|
| 527 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48418 ms - Host latency: 0.552612 ms (enqueue 0.233008 ms)
|
| 528 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483765 ms - Host latency: 0.552075 ms (enqueue 0.231519 ms)
|
| 529 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484082 ms - Host latency: 0.55188 ms (enqueue 0.231323 ms)
|
| 530 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483154 ms - Host latency: 0.551416 ms (enqueue 0.233057 ms)
|
| 531 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48374 ms - Host latency: 0.552246 ms (enqueue 0.232788 ms)
|
| 532 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48374 ms - Host latency: 0.552197 ms (enqueue 0.230981 ms)
|
| 533 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483765 ms - Host latency: 0.551172 ms (enqueue 0.243335 ms)
|
| 534 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483887 ms - Host latency: 0.552441 ms (enqueue 0.23374 ms)
|
| 535 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.482593 ms - Host latency: 0.551001 ms (enqueue 0.234961 ms)
|
| 536 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483716 ms - Host latency: 0.552222 ms (enqueue 0.230176 ms)
|
| 537 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483618 ms - Host latency: 0.551978 ms (enqueue 0.232983 ms)
|
| 538 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483618 ms - Host latency: 0.552148 ms (enqueue 0.228857 ms)
|
| 539 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483521 ms - Host latency: 0.552026 ms (enqueue 0.23125 ms)
|
| 540 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483423 ms - Host latency: 0.551978 ms (enqueue 0.2323 ms)
|
| 541 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483301 ms - Host latency: 0.551587 ms (enqueue 0.230908 ms)
|
| 542 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483838 ms - Host latency: 0.552026 ms (enqueue 0.229028 ms)
|
| 543 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483984 ms - Host latency: 0.552612 ms (enqueue 0.229785 ms)
|
| 544 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483716 ms - Host latency: 0.552197 ms (enqueue 0.229224 ms)
|
| 545 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483521 ms - Host latency: 0.551367 ms (enqueue 0.231177 ms)
|
| 546 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483398 ms - Host latency: 0.550952 ms (enqueue 0.235059 ms)
|
| 547 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483838 ms - Host latency: 0.552246 ms (enqueue 0.230957 ms)
|
| 548 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483252 ms - Host latency: 0.551733 ms (enqueue 0.232104 ms)
|
| 549 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484082 ms - Host latency: 0.552905 ms (enqueue 0.230811 ms)
|
| 550 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483203 ms - Host latency: 0.551636 ms (enqueue 0.23623 ms)
|
| 551 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483813 ms - Host latency: 0.551367 ms (enqueue 0.257251 ms)
|
| 552 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483374 ms - Host latency: 0.552246 ms (enqueue 0.245044 ms)
|
| 553 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483081 ms - Host latency: 0.551904 ms (enqueue 0.24541 ms)
|
| 554 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483545 ms - Host latency: 0.551538 ms (enqueue 0.241846 ms)
|
| 555 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483325 ms - Host latency: 0.552075 ms (enqueue 0.242969 ms)
|
| 556 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483423 ms - Host latency: 0.551953 ms (enqueue 0.244067 ms)
|
| 557 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483569 ms - Host latency: 0.551001 ms (enqueue 0.261768 ms)
|
| 558 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483179 ms - Host latency: 0.551587 ms (enqueue 0.231104 ms)
|
| 559 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484033 ms - Host latency: 0.552539 ms (enqueue 0.228149 ms)
|
| 560 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48335 ms - Host latency: 0.551807 ms (enqueue 0.22832 ms)
|
| 561 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.48291 ms - Host latency: 0.551343 ms (enqueue 0.231006 ms)
|
| 562 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483325 ms - Host latency: 0.552002 ms (enqueue 0.232666 ms)
|
| 563 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.484229 ms - Host latency: 0.5521 ms (enqueue 0.231201 ms)
|
| 564 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483447 ms - Host latency: 0.551538 ms (enqueue 0.230884 ms)
|
| 565 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483691 ms - Host latency: 0.551953 ms (enqueue 0.230859 ms)
|
| 566 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483154 ms - Host latency: 0.551685 ms (enqueue 0.230762 ms)
|
| 567 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483813 ms - Host latency: 0.552075 ms (enqueue 0.233179 ms)
|
| 568 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483936 ms - Host latency: 0.551465 ms (enqueue 0.233862 ms)
|
| 569 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483569 ms - Host latency: 0.552173 ms (enqueue 0.231665 ms)
|
| 570 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483716 ms - Host latency: 0.5521 ms (enqueue 0.231177 ms)
|
| 571 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483691 ms - Host latency: 0.551562 ms (enqueue 0.237524 ms)
|
| 572 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483545 ms - Host latency: 0.551855 ms (enqueue 0.231323 ms)
|
| 573 |
+
[01/20/2026-06:49:25] [I] Average on 10 runs - GPU latency: 0.483862 ms - Host latency: 0.552393 ms (enqueue 0.231226 ms)
|
| 574 |
+
[01/20/2026-06:49:25] [I]
|
| 575 |
+
[01/20/2026-06:49:25] [I] === Performance summary ===
|
| 576 |
+
[01/20/2026-06:49:25] [I] Throughput: 2052.01 qps
|
| 577 |
+
[01/20/2026-06:49:25] [I] Latency: min = 0.542114 ms, max = 0.611084 ms, mean = 0.551962 ms, median = 0.552002 ms, percentile(90%) = 0.553467 ms, percentile(95%) = 0.554169 ms, percentile(99%) = 0.569885 ms
|
| 578 |
+
[01/20/2026-06:49:25] [I] Enqueue Time: min = 0.223389 ms, max = 0.667725 ms, mean = 0.246327 ms, median = 0.233398 ms, percentile(90%) = 0.250854 ms, percentile(95%) = 0.268921 ms, percentile(99%) = 0.546997 ms
|
| 579 |
+
[01/20/2026-06:49:25] [I] H2D Latency: min = 0.0556641 ms, max = 0.0682983 ms, mean = 0.0632503 ms, median = 0.0639648 ms, percentile(90%) = 0.0644531 ms, percentile(95%) = 0.0645752 ms, percentile(99%) = 0.0646973 ms
|
| 580 |
+
[01/20/2026-06:49:25] [I] GPU Compute Time: min = 0.481201 ms, max = 0.54126 ms, mean = 0.484002 ms, median = 0.483398 ms, percentile(90%) = 0.484497 ms, percentile(95%) = 0.485352 ms, percentile(99%) = 0.503662 ms
|
| 581 |
+
[01/20/2026-06:49:25] [I] D2H Latency: min = 0.00415039 ms, max = 0.0131836 ms, mean = 0.00471637 ms, median = 0.00439453 ms, percentile(90%) = 0.00537109 ms, percentile(95%) = 0.00561523 ms, percentile(99%) = 0.0115967 ms
|
| 582 |
+
[01/20/2026-06:49:25] [I] Total Host Walltime: 3.00145 s
|
| 583 |
+
[01/20/2026-06:49:25] [I] Total GPU Compute Time: 2.98097 s
|
| 584 |
+
[01/20/2026-06:49:25] [I] Explanations of the performance metrics are printed in the verbose logs.
|
| 585 |
+
[01/20/2026-06:49:25] [I]
|
| 586 |
+
&&&& PASSED TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_hgnetv2_atto_coco.onnx --saveEngine=checkpoints/deimv2_hgnetv2_atto_coco.engine --fp16 --optShapes=images:1x3x320x320,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
deimv2_hgnetv2_atto_coco.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1ef1245ba32de4b02f078f1cf0ae4f5e99b69c94604671340a131287d652b59
|
| 3 |
+
size 2277585
|
deimv2_hgnetv2_atto_coco.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b793f073e068429c162f8e743dc10b5f93203c704a3617db56f9c8fa5f659f14
|
| 3 |
+
size 2247119
|
deimv2_hgnetv2_femto_coco.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10baa468ea4a67cb47451ad255cd86eafe588b28dd19032e9d3998ef508aca8f
|
| 3 |
+
size 4683956
|
deimv2_hgnetv2_femto_coco.log
ADDED
|
@@ -0,0 +1,590 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
&&&& RUNNING TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_hgnetv2_femto_coco.onnx --saveEngine=checkpoints/deimv2_hgnetv2_femto_coco.engine --fp16 --optShapes=images:1x3x416x416,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
| 2 |
+
[01/20/2026-06:49:25] [W] optShapes is being broadcasted to minShapes for tensor orig_target_sizes
|
| 3 |
+
[01/20/2026-06:49:25] [W] optShapes is being broadcasted to maxShapes for tensor orig_target_sizes
|
| 4 |
+
[01/20/2026-06:49:25] [W] optShapes is being broadcasted to minShapes for tensor images
|
| 5 |
+
[01/20/2026-06:49:25] [W] optShapes is being broadcasted to maxShapes for tensor images
|
| 6 |
+
[01/20/2026-06:49:25] [W] Weakly-typed networks have been deprecated in TensorRT. You can use the AutoCast tool (https://nvidia.github.io/TensorRT-Model-Optimizer/guides/8_autocast.html) to convert the network to be strongly typed.
|
| 7 |
+
[01/20/2026-06:49:25] [I] === Model Options ===
|
| 8 |
+
[01/20/2026-06:49:25] [I] Format: ONNX
|
| 9 |
+
[01/20/2026-06:49:25] [I] Model: checkpoints/deimv2_hgnetv2_femto_coco.onnx
|
| 10 |
+
[01/20/2026-06:49:25] [I] Output:
|
| 11 |
+
[01/20/2026-06:49:25] [I] === Build Options ===
|
| 12 |
+
[01/20/2026-06:49:25] [I] Memory Pools: workspace: 4096 MiB, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default, tacticSharedMem: default
|
| 13 |
+
[01/20/2026-06:49:25] [I] avgTiming: 8
|
| 14 |
+
[01/20/2026-06:49:25] [I] Precision: FP32+FP16
|
| 15 |
+
[01/20/2026-06:49:25] [I] LayerPrecisions:
|
| 16 |
+
[01/20/2026-06:49:25] [I] Layer Device Types:
|
| 17 |
+
[01/20/2026-06:49:25] [I] Decomposable Attentions:
|
| 18 |
+
[01/20/2026-06:49:25] [I] Calibration:
|
| 19 |
+
[01/20/2026-06:49:25] [I] Refit: Disabled
|
| 20 |
+
[01/20/2026-06:49:25] [I] Strip weights: Disabled
|
| 21 |
+
[01/20/2026-06:49:25] [I] Version Compatible: Disabled
|
| 22 |
+
[01/20/2026-06:49:25] [I] ONNX Plugin InstanceNorm: Disabled
|
| 23 |
+
[01/20/2026-06:49:25] [I] ONNX kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA flag: Disabled
|
| 24 |
+
[01/20/2026-06:49:25] [I] TensorRT runtime: full
|
| 25 |
+
[01/20/2026-06:49:25] [I] Lean DLL Path:
|
| 26 |
+
[01/20/2026-06:49:25] [I] Tempfile Controls: { in_memory: allow, temporary: allow }
|
| 27 |
+
[01/20/2026-06:49:25] [I] Exclude Lean Runtime: Disabled
|
| 28 |
+
[01/20/2026-06:49:25] [I] Sparsity: Disabled
|
| 29 |
+
[01/20/2026-06:49:25] [I] Safe mode: Disabled
|
| 30 |
+
[01/20/2026-06:49:25] [I] Build DLA standalone loadable: Disabled
|
| 31 |
+
[01/20/2026-06:49:25] [I] Allow GPU fallback for DLA: Disabled
|
| 32 |
+
[01/20/2026-06:49:25] [I] DirectIO mode: Disabled
|
| 33 |
+
[01/20/2026-06:49:25] [I] Restricted mode: Disabled
|
| 34 |
+
[01/20/2026-06:49:25] [I] Skip inference: Disabled
|
| 35 |
+
[01/20/2026-06:49:25] [I] Save engine: checkpoints/deimv2_hgnetv2_femto_coco.engine
|
| 36 |
+
[01/20/2026-06:49:25] [I] Load engine:
|
| 37 |
+
[01/20/2026-06:49:25] [I] Profiling verbosity: 0
|
| 38 |
+
[01/20/2026-06:49:25] [I] Tactic sources: Using default tactic sources
|
| 39 |
+
[01/20/2026-06:49:25] [I] timingCacheMode: local
|
| 40 |
+
[01/20/2026-06:49:25] [I] timingCacheFile:
|
| 41 |
+
[01/20/2026-06:49:25] [I] Enable Compilation Cache: Enabled
|
| 42 |
+
[01/20/2026-06:49:25] [I] Enable Monitor Memory: Disabled
|
| 43 |
+
[01/20/2026-06:49:25] [I] errorOnTimingCacheMiss: Disabled
|
| 44 |
+
[01/20/2026-06:49:25] [I] Preview Features: Use default preview flags.
|
| 45 |
+
[01/20/2026-06:49:25] [I] MaxAuxStreams: -1
|
| 46 |
+
[01/20/2026-06:49:25] [I] BuilderOptimizationLevel: 3
|
| 47 |
+
[01/20/2026-06:49:25] [I] MaxTactics: -1
|
| 48 |
+
[01/20/2026-06:49:25] [I] Calibration Profile Index: 0
|
| 49 |
+
[01/20/2026-06:49:25] [I] Weight Streaming: Disabled
|
| 50 |
+
[01/20/2026-06:49:25] [I] Runtime Platform: Same As Build
|
| 51 |
+
[01/20/2026-06:49:25] [I] Debug Tensors:
|
| 52 |
+
[01/20/2026-06:49:25] [I] Distributive Independence: Disabled
|
| 53 |
+
[01/20/2026-06:49:25] [I] Mark Unfused Tensors As Debug Tensors: Disabled
|
| 54 |
+
[01/20/2026-06:49:25] [I] Input(s)s format: fp32:CHW
|
| 55 |
+
[01/20/2026-06:49:25] [I] Output(s)s format: fp32:CHW
|
| 56 |
+
[01/20/2026-06:49:25] [I] Input build shape (profile 0): images=1x3x416x416+1x3x416x416+1x3x416x416
|
| 57 |
+
[01/20/2026-06:49:25] [I] Input build shape (profile 0): orig_target_sizes=1x2+1x2+1x2
|
| 58 |
+
[01/20/2026-06:49:25] [I] Input calibration shapes: model
|
| 59 |
+
[01/20/2026-06:49:25] [I] === System Options ===
|
| 60 |
+
[01/20/2026-06:49:25] [I] Device: 0
|
| 61 |
+
[01/20/2026-06:49:25] [I] DLACore:
|
| 62 |
+
[01/20/2026-06:49:25] [I] Plugins:
|
| 63 |
+
[01/20/2026-06:49:25] [I] setPluginsToSerialize:
|
| 64 |
+
[01/20/2026-06:49:25] [I] dynamicPlugins:
|
| 65 |
+
[01/20/2026-06:49:25] [I] ignoreParsedPluginLibs: 0
|
| 66 |
+
[01/20/2026-06:49:25] [I]
|
| 67 |
+
[01/20/2026-06:49:25] [I] === Inference Options ===
|
| 68 |
+
[01/20/2026-06:49:25] [I] Batch: Explicit
|
| 69 |
+
[01/20/2026-06:49:25] [I] Input inference shape : orig_target_sizes=1x2
|
| 70 |
+
[01/20/2026-06:49:25] [I] Input inference shape : images=1x3x416x416
|
| 71 |
+
[01/20/2026-06:49:25] [I] Iterations: 10
|
| 72 |
+
[01/20/2026-06:49:25] [I] Duration: 3s (+ 200ms warm up)
|
| 73 |
+
[01/20/2026-06:49:25] [I] Sleep time: 0ms
|
| 74 |
+
[01/20/2026-06:49:25] [I] Idle time: 0ms
|
| 75 |
+
[01/20/2026-06:49:25] [I] Inference Streams: 1
|
| 76 |
+
[01/20/2026-06:49:25] [I] ExposeDMA: Disabled
|
| 77 |
+
[01/20/2026-06:49:25] [I] Data transfers: Enabled
|
| 78 |
+
[01/20/2026-06:49:25] [I] Spin-wait: Disabled
|
| 79 |
+
[01/20/2026-06:49:25] [I] Multithreading: Disabled
|
| 80 |
+
[01/20/2026-06:49:25] [I] CUDA Graph: Disabled
|
| 81 |
+
[01/20/2026-06:49:25] [I] Separate profiling: Disabled
|
| 82 |
+
[01/20/2026-06:49:25] [I] Time Deserialize: Disabled
|
| 83 |
+
[01/20/2026-06:49:25] [I] Time Refit: Disabled
|
| 84 |
+
[01/20/2026-06:49:25] [I] NVTX verbosity: 0
|
| 85 |
+
[01/20/2026-06:49:25] [I] Persistent Cache Ratio: 0
|
| 86 |
+
[01/20/2026-06:49:25] [I] Optimization Profile Index: 0
|
| 87 |
+
[01/20/2026-06:49:25] [I] Weight Streaming Budget: 100.000000%
|
| 88 |
+
[01/20/2026-06:49:25] [I] Inputs:
|
| 89 |
+
[01/20/2026-06:49:25] [I] Debug Tensor Save Destinations:
|
| 90 |
+
[01/20/2026-06:49:25] [I] Dump All Debug Tensor in Formats:
|
| 91 |
+
[01/20/2026-06:49:25] [I] === Reporting Options ===
|
| 92 |
+
[01/20/2026-06:49:25] [I] Verbose: Disabled
|
| 93 |
+
[01/20/2026-06:49:25] [I] Averages: 10 inferences
|
| 94 |
+
[01/20/2026-06:49:25] [I] Percentiles: 90,95,99
|
| 95 |
+
[01/20/2026-06:49:25] [I] Dump refittable layers:Disabled
|
| 96 |
+
[01/20/2026-06:49:25] [I] Dump output: Disabled
|
| 97 |
+
[01/20/2026-06:49:25] [I] Profile: Disabled
|
| 98 |
+
[01/20/2026-06:49:25] [I] Export timing to JSON file:
|
| 99 |
+
[01/20/2026-06:49:25] [I] Export output to JSON file:
|
| 100 |
+
[01/20/2026-06:49:25] [I] Export profile to JSON file:
|
| 101 |
+
[01/20/2026-06:49:25] [I]
|
| 102 |
+
[01/20/2026-06:49:25] [I] === Device Information ===
|
| 103 |
+
[01/20/2026-06:49:25] [I] Available Devices:
|
| 104 |
+
[01/20/2026-06:49:25] [I] Device 0: "NVIDIA GeForce RTX 4090" UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 105 |
+
[01/20/2026-06:49:25] [I] Selected Device: NVIDIA GeForce RTX 4090
|
| 106 |
+
[01/20/2026-06:49:25] [I] Selected Device ID: 0
|
| 107 |
+
[01/20/2026-06:49:25] [I] Selected Device UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 108 |
+
[01/20/2026-06:49:25] [I] Compute Capability: 8.9
|
| 109 |
+
[01/20/2026-06:49:25] [I] SMs: 128
|
| 110 |
+
[01/20/2026-06:49:25] [I] Device Global Memory: 24071 MiB
|
| 111 |
+
[01/20/2026-06:49:25] [I] Shared Memory per SM: 100 KiB
|
| 112 |
+
[01/20/2026-06:49:25] [I] Memory Bus Width: 384 bits (ECC disabled)
|
| 113 |
+
[01/20/2026-06:49:25] [I] Application Compute Clock Rate: 2.52 GHz
|
| 114 |
+
[01/20/2026-06:49:25] [I] Application Memory Clock Rate: 10.501 GHz
|
| 115 |
+
[01/20/2026-06:49:25] [I]
|
| 116 |
+
[01/20/2026-06:49:25] [I] Note: The application clock rates do not reflect the actual clock rates that the GPU is currently running at.
|
| 117 |
+
[01/20/2026-06:49:25] [I]
|
| 118 |
+
[01/20/2026-06:49:25] [I] TensorRT version: 10.14.1
|
| 119 |
+
[01/20/2026-06:49:25] [I] Loading standard plugins
|
| 120 |
+
[01/20/2026-06:49:25] [I] [TRT] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 29, GPU 10547 (MiB)
|
| 121 |
+
[01/20/2026-06:49:25] [I] Start parsing network model.
|
| 122 |
+
[01/20/2026-06:49:25] [I] [TRT] ----------------------------------------------------------------
|
| 123 |
+
[01/20/2026-06:49:25] [I] [TRT] Input filename: checkpoints/deimv2_hgnetv2_femto_coco.onnx
|
| 124 |
+
[01/20/2026-06:49:25] [I] [TRT] ONNX IR version: 0.0.8
|
| 125 |
+
[01/20/2026-06:49:25] [I] [TRT] Opset version: 17
|
| 126 |
+
[01/20/2026-06:49:25] [I] [TRT] Producer name: pytorch
|
| 127 |
+
[01/20/2026-06:49:25] [I] [TRT] Producer version: 2.10.0
|
| 128 |
+
[01/20/2026-06:49:25] [I] [TRT] Domain:
|
| 129 |
+
[01/20/2026-06:49:25] [I] [TRT] Model version: 0
|
| 130 |
+
[01/20/2026-06:49:25] [I] [TRT] Doc string:
|
| 131 |
+
[01/20/2026-06:49:25] [I] [TRT] ----------------------------------------------------------------
|
| 132 |
+
[01/20/2026-06:49:25] [W] [TRT] ModelImporter.cpp:661: Make sure input orig_target_sizes has Int64 binding.
|
| 133 |
+
[01/20/2026-06:49:25] [W] [TRT] ModelImporter.cpp:908: Make sure output labels has Int64 binding.
|
| 134 |
+
[01/20/2026-06:49:25] [I] Finished parsing network model. Parse time: 0.0239529
|
| 135 |
+
[01/20/2026-06:49:25] [I] Set shape of input tensor images for optimization profile 0 to: MIN=1x3x416x416 OPT=1x3x416x416 MAX=1x3x416x416
|
| 136 |
+
[01/20/2026-06:49:25] [I] Set shape of input tensor orig_target_sizes for optimization profile 0 to: MIN=1x2 OPT=1x2 MAX=1x2
|
| 137 |
+
[01/20/2026-06:49:26] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +204, GPU +4, now: CPU 514, GPU 10551 (MiB)
|
| 138 |
+
[01/20/2026-06:49:26] [W] [TRT] Detected layernorm nodes in FP16.
|
| 139 |
+
[01/20/2026-06:49:26] [W] [TRT] Running layernorm after self-attention with FP16 Reduce or Pow may cause overflow. Forcing Reduce or Pow Layers in FP32 precision, or exporting the model to use INormalizationLayer (available with ONNX opset >= 17) can help preserving accuracy.
|
| 140 |
+
[01/20/2026-06:49:26] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored.
|
| 141 |
+
[01/20/2026-06:50:32] [I] [TRT] Compiler backend is used during engine build.
|
| 142 |
+
[01/20/2026-06:50:59] [I] [TRT] Detected 2 inputs and 3 output network tensors.
|
| 143 |
+
[01/20/2026-06:51:00] [I] [TRT] Total Host Persistent Memory: 309840 bytes
|
| 144 |
+
[01/20/2026-06:51:00] [I] [TRT] Total Device Persistent Memory: 0 bytes
|
| 145 |
+
[01/20/2026-06:51:00] [I] [TRT] Max Scratch Memory: 791040 bytes
|
| 146 |
+
[01/20/2026-06:51:00] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 84 steps to complete.
|
| 147 |
+
[01/20/2026-06:51:00] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 1.76084ms to assign 10 blocks to 84 nodes requiring 5264384 bytes.
|
| 148 |
+
[01/20/2026-06:51:00] [I] [TRT] Total Activation Memory: 5264384 bytes
|
| 149 |
+
[01/20/2026-06:51:00] [I] [TRT] Total Weights Memory: 1977280 bytes
|
| 150 |
+
[01/20/2026-06:51:00] [I] [TRT] Compiler backend is used during engine execution.
|
| 151 |
+
[01/20/2026-06:51:00] [I] [TRT] Engine generation completed in 94.1008 seconds.
|
| 152 |
+
[01/20/2026-06:51:00] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 0 MiB, GPU 11 MiB
|
| 153 |
+
[01/20/2026-06:51:00] [I] Created engine with size: 4.46697 MiB
|
| 154 |
+
[01/20/2026-06:51:00] [I] Engine built in 94.336 sec.
|
| 155 |
+
[01/20/2026-06:51:00] [I] [TRT] Loaded engine size: 4 MiB
|
| 156 |
+
[01/20/2026-06:51:00] [I] Engine deserialized in 0.00890684 sec.
|
| 157 |
+
[01/20/2026-06:51:00] [I] [TRT] [MS] Running engine with multi stream info
|
| 158 |
+
[01/20/2026-06:51:00] [I] [TRT] [MS] Number of aux streams is 2
|
| 159 |
+
[01/20/2026-06:51:00] [I] [TRT] [MS] Number of total worker streams is 3
|
| 160 |
+
[01/20/2026-06:51:00] [I] [TRT] [MS] The main stream provided by execute/enqueue calls is the first worker stream
|
| 161 |
+
[01/20/2026-06:51:00] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +5, now: CPU 0, GPU 6 (MiB)
|
| 162 |
+
[01/20/2026-06:51:00] [I] Setting persistentCacheLimit to 0 bytes.
|
| 163 |
+
[01/20/2026-06:51:00] [I] Created execution context with device memory size: 5.02051 MiB
|
| 164 |
+
[01/20/2026-06:51:00] [I] Using random values for input images
|
| 165 |
+
[01/20/2026-06:51:00] [I] Input binding for images with dimensions 1x3x416x416 is created.
|
| 166 |
+
[01/20/2026-06:51:00] [I] Using random values for input orig_target_sizes
|
| 167 |
+
[01/20/2026-06:51:00] [I] Input binding for orig_target_sizes with dimensions 1x2 is created.
|
| 168 |
+
[01/20/2026-06:51:00] [I] Output binding for labels with dimensions 1x300 is created.
|
| 169 |
+
[01/20/2026-06:51:00] [I] Output binding for boxes with dimensions 1x300x4 is created.
|
| 170 |
+
[01/20/2026-06:51:00] [I] Output binding for scores with dimensions 1x300 is created.
|
| 171 |
+
[01/20/2026-06:51:00] [I] Starting inference
|
| 172 |
+
[01/20/2026-06:51:03] [I] Warmup completed 401 queries over 200 ms
|
| 173 |
+
[01/20/2026-06:51:03] [I] Timing trace has 6048 queries over 3.00125 s
|
| 174 |
+
[01/20/2026-06:51:03] [I]
|
| 175 |
+
[01/20/2026-06:51:03] [I] === Trace details ===
|
| 176 |
+
[01/20/2026-06:51:03] [I] Trace averages of 10 runs:
|
| 177 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493713 ms - Host latency: 0.602475 ms (enqueue 0.22467 ms)
|
| 178 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493674 ms - Host latency: 0.601965 ms (enqueue 0.226738 ms)
|
| 179 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493948 ms - Host latency: 0.601505 ms (enqueue 0.22599 ms)
|
| 180 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494272 ms - Host latency: 0.602023 ms (enqueue 0.224661 ms)
|
| 181 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493816 ms - Host latency: 0.601375 ms (enqueue 0.224545 ms)
|
| 182 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493932 ms - Host latency: 0.600436 ms (enqueue 0.226152 ms)
|
| 183 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493787 ms - Host latency: 0.599954 ms (enqueue 0.22563 ms)
|
| 184 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494127 ms - Host latency: 0.600565 ms (enqueue 0.225076 ms)
|
| 185 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493475 ms - Host latency: 0.600099 ms (enqueue 0.224956 ms)
|
| 186 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49416 ms - Host latency: 0.601909 ms (enqueue 0.224332 ms)
|
| 187 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494041 ms - Host latency: 0.601018 ms (enqueue 0.224092 ms)
|
| 188 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49381 ms - Host latency: 0.600476 ms (enqueue 0.221588 ms)
|
| 189 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493488 ms - Host latency: 0.60033 ms (enqueue 0.221014 ms)
|
| 190 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495172 ms - Host latency: 0.60372 ms (enqueue 0.222607 ms)
|
| 191 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494138 ms - Host latency: 0.601962 ms (enqueue 0.220322 ms)
|
| 192 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494278 ms - Host latency: 0.601825 ms (enqueue 0.220114 ms)
|
| 193 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494632 ms - Host latency: 0.603384 ms (enqueue 0.22037 ms)
|
| 194 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494324 ms - Host latency: 0.602039 ms (enqueue 0.219659 ms)
|
| 195 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494284 ms - Host latency: 0.602158 ms (enqueue 0.220499 ms)
|
| 196 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49639 ms - Host latency: 0.60632 ms (enqueue 0.223215 ms)
|
| 197 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496268 ms - Host latency: 0.604941 ms (enqueue 0.224225 ms)
|
| 198 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494122 ms - Host latency: 0.600092 ms (enqueue 0.220337 ms)
|
| 199 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494122 ms - Host latency: 0.600214 ms (enqueue 0.220486 ms)
|
| 200 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493692 ms - Host latency: 0.599573 ms (enqueue 0.220978 ms)
|
| 201 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496344 ms - Host latency: 0.605923 ms (enqueue 0.227701 ms)
|
| 202 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497476 ms - Host latency: 0.607159 ms (enqueue 0.22236 ms)
|
| 203 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494815 ms - Host latency: 0.602969 ms (enqueue 0.220789 ms)
|
| 204 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493729 ms - Host latency: 0.602499 ms (enqueue 0.219882 ms)
|
| 205 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493832 ms - Host latency: 0.602872 ms (enqueue 0.219992 ms)
|
| 206 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493594 ms - Host latency: 0.601752 ms (enqueue 0.220923 ms)
|
| 207 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496689 ms - Host latency: 0.606149 ms (enqueue 0.227429 ms)
|
| 208 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.498596 ms - Host latency: 0.610367 ms (enqueue 0.228427 ms)
|
| 209 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493329 ms - Host latency: 0.600107 ms (enqueue 0.220749 ms)
|
| 210 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493271 ms - Host latency: 0.599982 ms (enqueue 0.220676 ms)
|
| 211 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49342 ms - Host latency: 0.600366 ms (enqueue 0.220654 ms)
|
| 212 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493646 ms - Host latency: 0.600684 ms (enqueue 0.224249 ms)
|
| 213 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497488 ms - Host latency: 0.609607 ms (enqueue 0.232849 ms)
|
| 214 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496677 ms - Host latency: 0.606174 ms (enqueue 0.224466 ms)
|
| 215 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49541 ms - Host latency: 0.604309 ms (enqueue 0.223715 ms)
|
| 216 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495593 ms - Host latency: 0.603815 ms (enqueue 0.228955 ms)
|
| 217 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495853 ms - Host latency: 0.60336 ms (enqueue 0.226093 ms)
|
| 218 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49397 ms - Host latency: 0.601923 ms (enqueue 0.219882 ms)
|
| 219 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494135 ms - Host latency: 0.602353 ms (enqueue 0.219855 ms)
|
| 220 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496155 ms - Host latency: 0.605099 ms (enqueue 0.220847 ms)
|
| 221 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494055 ms - Host latency: 0.60155 ms (enqueue 0.220575 ms)
|
| 222 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494397 ms - Host latency: 0.601715 ms (enqueue 0.219708 ms)
|
| 223 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494351 ms - Host latency: 0.602679 ms (enqueue 0.219952 ms)
|
| 224 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494 ms - Host latency: 0.601544 ms (enqueue 0.219888 ms)
|
| 225 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494604 ms - Host latency: 0.602249 ms (enqueue 0.228403 ms)
|
| 226 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495709 ms - Host latency: 0.6048 ms (enqueue 0.225446 ms)
|
| 227 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493951 ms - Host latency: 0.602054 ms (enqueue 0.222302 ms)
|
| 228 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494489 ms - Host latency: 0.602121 ms (enqueue 0.222778 ms)
|
| 229 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493945 ms - Host latency: 0.601483 ms (enqueue 0.219873 ms)
|
| 230 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494205 ms - Host latency: 0.602539 ms (enqueue 0.219336 ms)
|
| 231 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493713 ms - Host latency: 0.602579 ms (enqueue 0.22048 ms)
|
| 232 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493512 ms - Host latency: 0.602786 ms (enqueue 0.220258 ms)
|
| 233 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493491 ms - Host latency: 0.60134 ms (enqueue 0.220413 ms)
|
| 234 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494028 ms - Host latency: 0.601407 ms (enqueue 0.219974 ms)
|
| 235 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494904 ms - Host latency: 0.601785 ms (enqueue 0.220477 ms)
|
| 236 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494489 ms - Host latency: 0.601022 ms (enqueue 0.223944 ms)
|
| 237 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49599 ms - Host latency: 0.604959 ms (enqueue 0.224094 ms)
|
| 238 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495425 ms - Host latency: 0.603757 ms (enqueue 0.224994 ms)
|
| 239 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493732 ms - Host latency: 0.601001 ms (enqueue 0.22247 ms)
|
| 240 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494202 ms - Host latency: 0.602478 ms (enqueue 0.22002 ms)
|
| 241 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493018 ms - Host latency: 0.601855 ms (enqueue 0.221094 ms)
|
| 242 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493573 ms - Host latency: 0.604138 ms (enqueue 0.224646 ms)
|
| 243 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496417 ms - Host latency: 0.60741 ms (enqueue 0.227393 ms)
|
| 244 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494031 ms - Host latency: 0.601801 ms (enqueue 0.223895 ms)
|
| 245 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494415 ms - Host latency: 0.602966 ms (enqueue 0.224164 ms)
|
| 246 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494794 ms - Host latency: 0.601752 ms (enqueue 0.223352 ms)
|
| 247 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494159 ms - Host latency: 0.600555 ms (enqueue 0.220776 ms)
|
| 248 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493787 ms - Host latency: 0.600421 ms (enqueue 0.219757 ms)
|
| 249 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494019 ms - Host latency: 0.601367 ms (enqueue 0.220331 ms)
|
| 250 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493542 ms - Host latency: 0.600128 ms (enqueue 0.222809 ms)
|
| 251 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493835 ms - Host latency: 0.601416 ms (enqueue 0.22049 ms)
|
| 252 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494232 ms - Host latency: 0.60318 ms (enqueue 0.219379 ms)
|
| 253 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49364 ms - Host latency: 0.601367 ms (enqueue 0.220728 ms)
|
| 254 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494269 ms - Host latency: 0.601837 ms (enqueue 0.219531 ms)
|
| 255 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49718 ms - Host latency: 0.604883 ms (enqueue 0.219659 ms)
|
| 256 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49895 ms - Host latency: 0.60741 ms (enqueue 0.238354 ms)
|
| 257 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494965 ms - Host latency: 0.603546 ms (enqueue 0.222833 ms)
|
| 258 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494202 ms - Host latency: 0.603516 ms (enqueue 0.224615 ms)
|
| 259 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494128 ms - Host latency: 0.601135 ms (enqueue 0.220868 ms)
|
| 260 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493524 ms - Host latency: 0.601086 ms (enqueue 0.220044 ms)
|
| 261 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494318 ms - Host latency: 0.602496 ms (enqueue 0.220001 ms)
|
| 262 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494244 ms - Host latency: 0.601459 ms (enqueue 0.219745 ms)
|
| 263 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493878 ms - Host latency: 0.600073 ms (enqueue 0.220013 ms)
|
| 264 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493329 ms - Host latency: 0.600189 ms (enqueue 0.222675 ms)
|
| 265 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494238 ms - Host latency: 0.601581 ms (enqueue 0.220093 ms)
|
| 266 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496289 ms - Host latency: 0.605029 ms (enqueue 0.233441 ms)
|
| 267 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495367 ms - Host latency: 0.604443 ms (enqueue 0.237946 ms)
|
| 268 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493817 ms - Host latency: 0.601965 ms (enqueue 0.224207 ms)
|
| 269 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494873 ms - Host latency: 0.603351 ms (enqueue 0.223273 ms)
|
| 270 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494287 ms - Host latency: 0.604059 ms (enqueue 0.221747 ms)
|
| 271 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494751 ms - Host latency: 0.602887 ms (enqueue 0.229193 ms)
|
| 272 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493616 ms - Host latency: 0.598981 ms (enqueue 0.224799 ms)
|
| 273 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493817 ms - Host latency: 0.600171 ms (enqueue 0.220319 ms)
|
| 274 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494098 ms - Host latency: 0.600378 ms (enqueue 0.215497 ms)
|
| 275 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494507 ms - Host latency: 0.601257 ms (enqueue 0.21239 ms)
|
| 276 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493732 ms - Host latency: 0.600256 ms (enqueue 0.218329 ms)
|
| 277 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494043 ms - Host latency: 0.600641 ms (enqueue 0.214203 ms)
|
| 278 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494244 ms - Host latency: 0.601959 ms (enqueue 0.216791 ms)
|
| 279 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494421 ms - Host latency: 0.603497 ms (enqueue 0.213782 ms)
|
| 280 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493372 ms - Host latency: 0.601575 ms (enqueue 0.215265 ms)
|
| 281 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494299 ms - Host latency: 0.601453 ms (enqueue 0.214655 ms)
|
| 282 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493958 ms - Host latency: 0.601648 ms (enqueue 0.216589 ms)
|
| 283 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49541 ms - Host latency: 0.603107 ms (enqueue 0.223853 ms)
|
| 284 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49436 ms - Host latency: 0.601178 ms (enqueue 0.219995 ms)
|
| 285 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495453 ms - Host latency: 0.602216 ms (enqueue 0.223596 ms)
|
| 286 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495563 ms - Host latency: 0.602448 ms (enqueue 0.222925 ms)
|
| 287 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494238 ms - Host latency: 0.602368 ms (enqueue 0.220435 ms)
|
| 288 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495843 ms - Host latency: 0.603827 ms (enqueue 0.225262 ms)
|
| 289 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493445 ms - Host latency: 0.601215 ms (enqueue 0.223602 ms)
|
| 290 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493579 ms - Host latency: 0.599945 ms (enqueue 0.223901 ms)
|
| 291 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496063 ms - Host latency: 0.605768 ms (enqueue 0.223047 ms)
|
| 292 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496051 ms - Host latency: 0.603522 ms (enqueue 0.226111 ms)
|
| 293 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494135 ms - Host latency: 0.601422 ms (enqueue 0.216083 ms)
|
| 294 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493738 ms - Host latency: 0.601086 ms (enqueue 0.216559 ms)
|
| 295 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494702 ms - Host latency: 0.60304 ms (enqueue 0.211713 ms)
|
| 296 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494269 ms - Host latency: 0.602368 ms (enqueue 0.212891 ms)
|
| 297 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494507 ms - Host latency: 0.601959 ms (enqueue 0.214362 ms)
|
| 298 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49447 ms - Host latency: 0.602783 ms (enqueue 0.233246 ms)
|
| 299 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493506 ms - Host latency: 0.600342 ms (enqueue 0.223248 ms)
|
| 300 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495233 ms - Host latency: 0.601819 ms (enqueue 0.22038 ms)
|
| 301 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.500342 ms - Host latency: 0.609271 ms (enqueue 0.245264 ms)
|
| 302 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493646 ms - Host latency: 0.600146 ms (enqueue 0.227319 ms)
|
| 303 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493713 ms - Host latency: 0.600928 ms (enqueue 0.227594 ms)
|
| 304 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494446 ms - Host latency: 0.603979 ms (enqueue 0.235321 ms)
|
| 305 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494012 ms - Host latency: 0.602008 ms (enqueue 0.228046 ms)
|
| 306 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494513 ms - Host latency: 0.602411 ms (enqueue 0.225488 ms)
|
| 307 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493811 ms - Host latency: 0.602985 ms (enqueue 0.227765 ms)
|
| 308 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494019 ms - Host latency: 0.602869 ms (enqueue 0.226105 ms)
|
| 309 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494049 ms - Host latency: 0.601562 ms (enqueue 0.225061 ms)
|
| 310 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494415 ms - Host latency: 0.600507 ms (enqueue 0.223889 ms)
|
| 311 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493585 ms - Host latency: 0.599835 ms (enqueue 0.223383 ms)
|
| 312 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49411 ms - Host latency: 0.600171 ms (enqueue 0.224017 ms)
|
| 313 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494232 ms - Host latency: 0.600134 ms (enqueue 0.224213 ms)
|
| 314 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493829 ms - Host latency: 0.600433 ms (enqueue 0.220465 ms)
|
| 315 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493945 ms - Host latency: 0.60061 ms (enqueue 0.219989 ms)
|
| 316 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494141 ms - Host latency: 0.602783 ms (enqueue 0.220898 ms)
|
| 317 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49588 ms - Host latency: 0.605609 ms (enqueue 0.226678 ms)
|
| 318 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494482 ms - Host latency: 0.601971 ms (enqueue 0.223248 ms)
|
| 319 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493945 ms - Host latency: 0.601202 ms (enqueue 0.222266 ms)
|
| 320 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496149 ms - Host latency: 0.603375 ms (enqueue 0.229279 ms)
|
| 321 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494177 ms - Host latency: 0.602002 ms (enqueue 0.223376 ms)
|
| 322 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493719 ms - Host latency: 0.601056 ms (enqueue 0.225677 ms)
|
| 323 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494263 ms - Host latency: 0.60144 ms (enqueue 0.220264 ms)
|
| 324 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494397 ms - Host latency: 0.601031 ms (enqueue 0.220398 ms)
|
| 325 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493488 ms - Host latency: 0.60061 ms (enqueue 0.221118 ms)
|
| 326 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495435 ms - Host latency: 0.603284 ms (enqueue 0.229395 ms)
|
| 327 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493829 ms - Host latency: 0.600861 ms (enqueue 0.229297 ms)
|
| 328 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497095 ms - Host latency: 0.605206 ms (enqueue 0.235187 ms)
|
| 329 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496075 ms - Host latency: 0.605542 ms (enqueue 0.225452 ms)
|
| 330 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495013 ms - Host latency: 0.603827 ms (enqueue 0.224585 ms)
|
| 331 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.498834 ms - Host latency: 0.610443 ms (enqueue 0.234204 ms)
|
| 332 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494153 ms - Host latency: 0.601788 ms (enqueue 0.226453 ms)
|
| 333 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493854 ms - Host latency: 0.602136 ms (enqueue 0.241925 ms)
|
| 334 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494086 ms - Host latency: 0.60282 ms (enqueue 0.234418 ms)
|
| 335 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494189 ms - Host latency: 0.602972 ms (enqueue 0.230725 ms)
|
| 336 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493579 ms - Host latency: 0.600769 ms (enqueue 0.229364 ms)
|
| 337 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493579 ms - Host latency: 0.600146 ms (enqueue 0.228516 ms)
|
| 338 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493683 ms - Host latency: 0.599933 ms (enqueue 0.230585 ms)
|
| 339 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493896 ms - Host latency: 0.6 ms (enqueue 0.229504 ms)
|
| 340 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493781 ms - Host latency: 0.600006 ms (enqueue 0.229608 ms)
|
| 341 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494 ms - Host latency: 0.600421 ms (enqueue 0.229547 ms)
|
| 342 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494348 ms - Host latency: 0.602118 ms (enqueue 0.228394 ms)
|
| 343 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494324 ms - Host latency: 0.602319 ms (enqueue 0.224506 ms)
|
| 344 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494104 ms - Host latency: 0.602905 ms (enqueue 0.226416 ms)
|
| 345 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493445 ms - Host latency: 0.602026 ms (enqueue 0.223474 ms)
|
| 346 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494922 ms - Host latency: 0.602661 ms (enqueue 0.227344 ms)
|
| 347 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493945 ms - Host latency: 0.601538 ms (enqueue 0.224915 ms)
|
| 348 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493677 ms - Host latency: 0.599768 ms (enqueue 0.223962 ms)
|
| 349 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493616 ms - Host latency: 0.59989 ms (enqueue 0.223438 ms)
|
| 350 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493652 ms - Host latency: 0.599878 ms (enqueue 0.223267 ms)
|
| 351 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493665 ms - Host latency: 0.600732 ms (enqueue 0.232874 ms)
|
| 352 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493579 ms - Host latency: 0.599963 ms (enqueue 0.228113 ms)
|
| 353 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494067 ms - Host latency: 0.601538 ms (enqueue 0.232483 ms)
|
| 354 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49397 ms - Host latency: 0.602148 ms (enqueue 0.228455 ms)
|
| 355 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493311 ms - Host latency: 0.601831 ms (enqueue 0.227258 ms)
|
| 356 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494446 ms - Host latency: 0.602344 ms (enqueue 0.22782 ms)
|
| 357 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494275 ms - Host latency: 0.601636 ms (enqueue 0.22522 ms)
|
| 358 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496265 ms - Host latency: 0.606116 ms (enqueue 0.230444 ms)
|
| 359 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494141 ms - Host latency: 0.6021 ms (enqueue 0.223279 ms)
|
| 360 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495471 ms - Host latency: 0.604126 ms (enqueue 0.225171 ms)
|
| 361 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495422 ms - Host latency: 0.6047 ms (enqueue 0.226965 ms)
|
| 362 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494788 ms - Host latency: 0.604224 ms (enqueue 0.231531 ms)
|
| 363 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494031 ms - Host latency: 0.602039 ms (enqueue 0.223755 ms)
|
| 364 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.499304 ms - Host latency: 0.611072 ms (enqueue 0.235974 ms)
|
| 365 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494006 ms - Host latency: 0.600171 ms (enqueue 0.224304 ms)
|
| 366 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496667 ms - Host latency: 0.606262 ms (enqueue 0.230017 ms)
|
| 367 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494177 ms - Host latency: 0.600891 ms (enqueue 0.223279 ms)
|
| 368 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497729 ms - Host latency: 0.606555 ms (enqueue 0.232239 ms)
|
| 369 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493823 ms - Host latency: 0.601819 ms (enqueue 0.233752 ms)
|
| 370 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494836 ms - Host latency: 0.603833 ms (enqueue 0.23186 ms)
|
| 371 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494861 ms - Host latency: 0.6026 ms (enqueue 0.23634 ms)
|
| 372 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493994 ms - Host latency: 0.603503 ms (enqueue 0.229907 ms)
|
| 373 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494775 ms - Host latency: 0.604749 ms (enqueue 0.237427 ms)
|
| 374 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494019 ms - Host latency: 0.599585 ms (enqueue 0.22948 ms)
|
| 375 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494739 ms - Host latency: 0.602051 ms (enqueue 0.225 ms)
|
| 376 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493799 ms - Host latency: 0.600293 ms (enqueue 0.2354 ms)
|
| 377 |
+
[01/20/2026-06:51:03] [I] ... Omitting 2048 lines
|
| 378 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493848 ms - Host latency: 0.601001 ms (enqueue 0.235474 ms)
|
| 379 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494653 ms - Host latency: 0.600879 ms (enqueue 0.226221 ms)
|
| 380 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494385 ms - Host latency: 0.60144 ms (enqueue 0.223901 ms)
|
| 381 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49397 ms - Host latency: 0.601099 ms (enqueue 0.223535 ms)
|
| 382 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493726 ms - Host latency: 0.602295 ms (enqueue 0.223389 ms)
|
| 383 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494263 ms - Host latency: 0.602954 ms (enqueue 0.222803 ms)
|
| 384 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494653 ms - Host latency: 0.602832 ms (enqueue 0.219751 ms)
|
| 385 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49668 ms - Host latency: 0.604419 ms (enqueue 0.216943 ms)
|
| 386 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493994 ms - Host latency: 0.603955 ms (enqueue 0.220166 ms)
|
| 387 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493604 ms - Host latency: 0.600952 ms (enqueue 0.219995 ms)
|
| 388 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494092 ms - Host latency: 0.600146 ms (enqueue 0.215601 ms)
|
| 389 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494727 ms - Host latency: 0.601172 ms (enqueue 0.221558 ms)
|
| 390 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493555 ms - Host latency: 0.600195 ms (enqueue 0.221289 ms)
|
| 391 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494214 ms - Host latency: 0.601099 ms (enqueue 0.215356 ms)
|
| 392 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494214 ms - Host latency: 0.60188 ms (enqueue 0.217334 ms)
|
| 393 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494067 ms - Host latency: 0.600855 ms (enqueue 0.21626 ms)
|
| 394 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49458 ms - Host latency: 0.601782 ms (enqueue 0.216284 ms)
|
| 395 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496826 ms - Host latency: 0.606616 ms (enqueue 0.231006 ms)
|
| 396 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496143 ms - Host latency: 0.60481 ms (enqueue 0.216406 ms)
|
| 397 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493408 ms - Host latency: 0.600928 ms (enqueue 0.217676 ms)
|
| 398 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497559 ms - Host latency: 0.60957 ms (enqueue 0.228052 ms)
|
| 399 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494238 ms - Host latency: 0.602734 ms (enqueue 0.216284 ms)
|
| 400 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493652 ms - Host latency: 0.600952 ms (enqueue 0.215845 ms)
|
| 401 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494556 ms - Host latency: 0.602564 ms (enqueue 0.219385 ms)
|
| 402 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496851 ms - Host latency: 0.606616 ms (enqueue 0.222754 ms)
|
| 403 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49541 ms - Host latency: 0.601758 ms (enqueue 0.218604 ms)
|
| 404 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495972 ms - Host latency: 0.604199 ms (enqueue 0.220166 ms)
|
| 405 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493896 ms - Host latency: 0.599756 ms (enqueue 0.219336 ms)
|
| 406 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493799 ms - Host latency: 0.599829 ms (enqueue 0.216235 ms)
|
| 407 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494067 ms - Host latency: 0.60144 ms (enqueue 0.227148 ms)
|
| 408 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493994 ms - Host latency: 0.602441 ms (enqueue 0.223462 ms)
|
| 409 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494385 ms - Host latency: 0.602661 ms (enqueue 0.223682 ms)
|
| 410 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494189 ms - Host latency: 0.603442 ms (enqueue 0.224194 ms)
|
| 411 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493774 ms - Host latency: 0.603345 ms (enqueue 0.223291 ms)
|
| 412 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497363 ms - Host latency: 0.606152 ms (enqueue 0.223682 ms)
|
| 413 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497583 ms - Host latency: 0.61333 ms (enqueue 0.239209 ms)
|
| 414 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494751 ms - Host latency: 0.601318 ms (enqueue 0.230396 ms)
|
| 415 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49375 ms - Host latency: 0.600269 ms (enqueue 0.223242 ms)
|
| 416 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493677 ms - Host latency: 0.599438 ms (enqueue 0.223901 ms)
|
| 417 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494751 ms - Host latency: 0.601587 ms (enqueue 0.226929 ms)
|
| 418 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49707 ms - Host latency: 0.607642 ms (enqueue 0.229883 ms)
|
| 419 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493896 ms - Host latency: 0.601245 ms (enqueue 0.230859 ms)
|
| 420 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494214 ms - Host latency: 0.603491 ms (enqueue 0.223462 ms)
|
| 421 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494214 ms - Host latency: 0.603003 ms (enqueue 0.226831 ms)
|
| 422 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493799 ms - Host latency: 0.601392 ms (enqueue 0.223267 ms)
|
| 423 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493628 ms - Host latency: 0.601074 ms (enqueue 0.222412 ms)
|
| 424 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49397 ms - Host latency: 0.602148 ms (enqueue 0.225317 ms)
|
| 425 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495361 ms - Host latency: 0.605469 ms (enqueue 0.226196 ms)
|
| 426 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49353 ms - Host latency: 0.600415 ms (enqueue 0.224146 ms)
|
| 427 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497559 ms - Host latency: 0.609106 ms (enqueue 0.23562 ms)
|
| 428 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493604 ms - Host latency: 0.600903 ms (enqueue 0.227832 ms)
|
| 429 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494556 ms - Host latency: 0.602539 ms (enqueue 0.223804 ms)
|
| 430 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493066 ms - Host latency: 0.600488 ms (enqueue 0.221973 ms)
|
| 431 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494287 ms - Host latency: 0.600635 ms (enqueue 0.223486 ms)
|
| 432 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493652 ms - Host latency: 0.600024 ms (enqueue 0.222217 ms)
|
| 433 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493506 ms - Host latency: 0.601099 ms (enqueue 0.22229 ms)
|
| 434 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49375 ms - Host latency: 0.601318 ms (enqueue 0.22251 ms)
|
| 435 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495996 ms - Host latency: 0.605859 ms (enqueue 0.227197 ms)
|
| 436 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494214 ms - Host latency: 0.603076 ms (enqueue 0.224292 ms)
|
| 437 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497363 ms - Host latency: 0.608691 ms (enqueue 0.227612 ms)
|
| 438 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494043 ms - Host latency: 0.603882 ms (enqueue 0.224194 ms)
|
| 439 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494434 ms - Host latency: 0.602808 ms (enqueue 0.226221 ms)
|
| 440 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493823 ms - Host latency: 0.600977 ms (enqueue 0.230054 ms)
|
| 441 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493872 ms - Host latency: 0.600366 ms (enqueue 0.228491 ms)
|
| 442 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493872 ms - Host latency: 0.600317 ms (enqueue 0.228198 ms)
|
| 443 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495752 ms - Host latency: 0.601758 ms (enqueue 0.232642 ms)
|
| 444 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493652 ms - Host latency: 0.600317 ms (enqueue 0.228857 ms)
|
| 445 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493896 ms - Host latency: 0.601562 ms (enqueue 0.22373 ms)
|
| 446 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494214 ms - Host latency: 0.602173 ms (enqueue 0.22356 ms)
|
| 447 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495996 ms - Host latency: 0.607007 ms (enqueue 0.227417 ms)
|
| 448 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494092 ms - Host latency: 0.60332 ms (enqueue 0.222803 ms)
|
| 449 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494189 ms - Host latency: 0.602856 ms (enqueue 0.221851 ms)
|
| 450 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494604 ms - Host latency: 0.602588 ms (enqueue 0.227002 ms)
|
| 451 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493774 ms - Host latency: 0.601392 ms (enqueue 0.222827 ms)
|
| 452 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493994 ms - Host latency: 0.599316 ms (enqueue 0.290698 ms)
|
| 453 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49502 ms - Host latency: 0.601099 ms (enqueue 0.338818 ms)
|
| 454 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495654 ms - Host latency: 0.596289 ms (enqueue 0.477686 ms)
|
| 455 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496289 ms - Host latency: 0.59917 ms (enqueue 0.399365 ms)
|
| 456 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496069 ms - Host latency: 0.604907 ms (enqueue 0.310937 ms)
|
| 457 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493945 ms - Host latency: 0.597876 ms (enqueue 0.407813 ms)
|
| 458 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497021 ms - Host latency: 0.599561 ms (enqueue 0.437109 ms)
|
| 459 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494482 ms - Host latency: 0.60083 ms (enqueue 0.308228 ms)
|
| 460 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49375 ms - Host latency: 0.601489 ms (enqueue 0.259839 ms)
|
| 461 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493945 ms - Host latency: 0.601709 ms (enqueue 0.242993 ms)
|
| 462 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494019 ms - Host latency: 0.602075 ms (enqueue 0.227612 ms)
|
| 463 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49397 ms - Host latency: 0.602246 ms (enqueue 0.224414 ms)
|
| 464 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493579 ms - Host latency: 0.601855 ms (enqueue 0.224146 ms)
|
| 465 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493848 ms - Host latency: 0.600879 ms (enqueue 0.222681 ms)
|
| 466 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495508 ms - Host latency: 0.604883 ms (enqueue 0.229321 ms)
|
| 467 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496094 ms - Host latency: 0.606006 ms (enqueue 0.227173 ms)
|
| 468 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493823 ms - Host latency: 0.600391 ms (enqueue 0.231445 ms)
|
| 469 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49624 ms - Host latency: 0.604639 ms (enqueue 0.236548 ms)
|
| 470 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495337 ms - Host latency: 0.601538 ms (enqueue 0.227832 ms)
|
| 471 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497168 ms - Host latency: 0.60686 ms (enqueue 0.234888 ms)
|
| 472 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495093 ms - Host latency: 0.602564 ms (enqueue 0.230103 ms)
|
| 473 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494971 ms - Host latency: 0.602832 ms (enqueue 0.23103 ms)
|
| 474 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494482 ms - Host latency: 0.602319 ms (enqueue 0.223242 ms)
|
| 475 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494287 ms - Host latency: 0.603003 ms (enqueue 0.223267 ms)
|
| 476 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495215 ms - Host latency: 0.604346 ms (enqueue 0.220117 ms)
|
| 477 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49397 ms - Host latency: 0.601855 ms (enqueue 0.215479 ms)
|
| 478 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493652 ms - Host latency: 0.599878 ms (enqueue 0.216772 ms)
|
| 479 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494238 ms - Host latency: 0.600415 ms (enqueue 0.215918 ms)
|
| 480 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494775 ms - Host latency: 0.600561 ms (enqueue 0.221875 ms)
|
| 481 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497363 ms - Host latency: 0.60708 ms (enqueue 0.232861 ms)
|
| 482 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.501758 ms - Host latency: 0.615112 ms (enqueue 0.246387 ms)
|
| 483 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496143 ms - Host latency: 0.605835 ms (enqueue 0.228711 ms)
|
| 484 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494092 ms - Host latency: 0.602466 ms (enqueue 0.227441 ms)
|
| 485 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.502075 ms - Host latency: 0.617383 ms (enqueue 0.234399 ms)
|
| 486 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495264 ms - Host latency: 0.608716 ms (enqueue 0.235986 ms)
|
| 487 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497485 ms - Host latency: 0.608301 ms (enqueue 0.232227 ms)
|
| 488 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494092 ms - Host latency: 0.601587 ms (enqueue 0.228467 ms)
|
| 489 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49873 ms - Host latency: 0.606494 ms (enqueue 0.245557 ms)
|
| 490 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496362 ms - Host latency: 0.603223 ms (enqueue 0.234399 ms)
|
| 491 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494238 ms - Host latency: 0.60083 ms (enqueue 0.232739 ms)
|
| 492 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494263 ms - Host latency: 0.601318 ms (enqueue 0.228345 ms)
|
| 493 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.498145 ms - Host latency: 0.605811 ms (enqueue 0.232129 ms)
|
| 494 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493555 ms - Host latency: 0.601074 ms (enqueue 0.227588 ms)
|
| 495 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493848 ms - Host latency: 0.601685 ms (enqueue 0.228394 ms)
|
| 496 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494971 ms - Host latency: 0.60188 ms (enqueue 0.229395 ms)
|
| 497 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494385 ms - Host latency: 0.602393 ms (enqueue 0.254346 ms)
|
| 498 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494702 ms - Host latency: 0.601587 ms (enqueue 0.279004 ms)
|
| 499 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494482 ms - Host latency: 0.599048 ms (enqueue 0.375024 ms)
|
| 500 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494067 ms - Host latency: 0.596021 ms (enqueue 0.451318 ms)
|
| 501 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49458 ms - Host latency: 0.599219 ms (enqueue 0.348462 ms)
|
| 502 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493774 ms - Host latency: 0.601538 ms (enqueue 0.268945 ms)
|
| 503 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494067 ms - Host latency: 0.601855 ms (enqueue 0.245825 ms)
|
| 504 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493652 ms - Host latency: 0.601733 ms (enqueue 0.239233 ms)
|
| 505 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493799 ms - Host latency: 0.601172 ms (enqueue 0.228442 ms)
|
| 506 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494043 ms - Host latency: 0.600195 ms (enqueue 0.229663 ms)
|
| 507 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493994 ms - Host latency: 0.600415 ms (enqueue 0.232153 ms)
|
| 508 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493945 ms - Host latency: 0.600342 ms (enqueue 0.227881 ms)
|
| 509 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493555 ms - Host latency: 0.600391 ms (enqueue 0.22854 ms)
|
| 510 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493652 ms - Host latency: 0.601562 ms (enqueue 0.227783 ms)
|
| 511 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494043 ms - Host latency: 0.601562 ms (enqueue 0.227661 ms)
|
| 512 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49436 ms - Host latency: 0.603345 ms (enqueue 0.228345 ms)
|
| 513 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493774 ms - Host latency: 0.604028 ms (enqueue 0.22688 ms)
|
| 514 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494263 ms - Host latency: 0.603125 ms (enqueue 0.236157 ms)
|
| 515 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49397 ms - Host latency: 0.601685 ms (enqueue 0.252124 ms)
|
| 516 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496631 ms - Host latency: 0.602197 ms (enqueue 0.341895 ms)
|
| 517 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496509 ms - Host latency: 0.596924 ms (enqueue 0.484131 ms)
|
| 518 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496411 ms - Host latency: 0.597852 ms (enqueue 0.396777 ms)
|
| 519 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494409 ms - Host latency: 0.600757 ms (enqueue 0.290869 ms)
|
| 520 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493774 ms - Host latency: 0.600561 ms (enqueue 0.253662 ms)
|
| 521 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494165 ms - Host latency: 0.600732 ms (enqueue 0.237451 ms)
|
| 522 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494556 ms - Host latency: 0.603662 ms (enqueue 0.231689 ms)
|
| 523 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493652 ms - Host latency: 0.602441 ms (enqueue 0.229712 ms)
|
| 524 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494336 ms - Host latency: 0.602051 ms (enqueue 0.229053 ms)
|
| 525 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494678 ms - Host latency: 0.602148 ms (enqueue 0.228125 ms)
|
| 526 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494995 ms - Host latency: 0.604492 ms (enqueue 0.237085 ms)
|
| 527 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494067 ms - Host latency: 0.601953 ms (enqueue 0.229297 ms)
|
| 528 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494312 ms - Host latency: 0.601514 ms (enqueue 0.229004 ms)
|
| 529 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493311 ms - Host latency: 0.600073 ms (enqueue 0.229053 ms)
|
| 530 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493701 ms - Host latency: 0.600537 ms (enqueue 0.228418 ms)
|
| 531 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494336 ms - Host latency: 0.601929 ms (enqueue 0.232886 ms)
|
| 532 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493799 ms - Host latency: 0.601074 ms (enqueue 0.229224 ms)
|
| 533 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493823 ms - Host latency: 0.600586 ms (enqueue 0.229346 ms)
|
| 534 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494214 ms - Host latency: 0.600439 ms (enqueue 0.231592 ms)
|
| 535 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494238 ms - Host latency: 0.601294 ms (enqueue 0.229736 ms)
|
| 536 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493799 ms - Host latency: 0.601489 ms (enqueue 0.229639 ms)
|
| 537 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493652 ms - Host latency: 0.601172 ms (enqueue 0.22981 ms)
|
| 538 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493872 ms - Host latency: 0.60188 ms (enqueue 0.22644 ms)
|
| 539 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494751 ms - Host latency: 0.602954 ms (enqueue 0.231665 ms)
|
| 540 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495093 ms - Host latency: 0.604932 ms (enqueue 0.233252 ms)
|
| 541 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495117 ms - Host latency: 0.603516 ms (enqueue 0.224609 ms)
|
| 542 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494165 ms - Host latency: 0.601807 ms (enqueue 0.233594 ms)
|
| 543 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49397 ms - Host latency: 0.600122 ms (enqueue 0.227832 ms)
|
| 544 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494043 ms - Host latency: 0.600513 ms (enqueue 0.225317 ms)
|
| 545 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494946 ms - Host latency: 0.601465 ms (enqueue 0.230054 ms)
|
| 546 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493579 ms - Host latency: 0.600708 ms (enqueue 0.227466 ms)
|
| 547 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494385 ms - Host latency: 0.601001 ms (enqueue 0.232275 ms)
|
| 548 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495215 ms - Host latency: 0.603931 ms (enqueue 0.229614 ms)
|
| 549 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496899 ms - Host latency: 0.607104 ms (enqueue 0.225122 ms)
|
| 550 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493921 ms - Host latency: 0.603247 ms (enqueue 0.224292 ms)
|
| 551 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496704 ms - Host latency: 0.608008 ms (enqueue 0.227515 ms)
|
| 552 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494312 ms - Host latency: 0.602051 ms (enqueue 0.224756 ms)
|
| 553 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497314 ms - Host latency: 0.608643 ms (enqueue 0.2354 ms)
|
| 554 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493896 ms - Host latency: 0.600244 ms (enqueue 0.229907 ms)
|
| 555 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494116 ms - Host latency: 0.600537 ms (enqueue 0.226489 ms)
|
| 556 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495044 ms - Host latency: 0.603833 ms (enqueue 0.233105 ms)
|
| 557 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493726 ms - Host latency: 0.600659 ms (enqueue 0.225659 ms)
|
| 558 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495898 ms - Host latency: 0.605078 ms (enqueue 0.236572 ms)
|
| 559 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.496411 ms - Host latency: 0.606226 ms (enqueue 0.229077 ms)
|
| 560 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493579 ms - Host latency: 0.601611 ms (enqueue 0.222388 ms)
|
| 561 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494189 ms - Host latency: 0.602441 ms (enqueue 0.223755 ms)
|
| 562 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495215 ms - Host latency: 0.602979 ms (enqueue 0.223853 ms)
|
| 563 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494336 ms - Host latency: 0.601978 ms (enqueue 0.229736 ms)
|
| 564 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497437 ms - Host latency: 0.608643 ms (enqueue 0.229053 ms)
|
| 565 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49353 ms - Host latency: 0.601489 ms (enqueue 0.225 ms)
|
| 566 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.495239 ms - Host latency: 0.606201 ms (enqueue 0.233594 ms)
|
| 567 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494238 ms - Host latency: 0.601367 ms (enqueue 0.227808 ms)
|
| 568 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.499609 ms - Host latency: 0.609863 ms (enqueue 0.244409 ms)
|
| 569 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.494263 ms - Host latency: 0.602612 ms (enqueue 0.230078 ms)
|
| 570 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493921 ms - Host latency: 0.600561 ms (enqueue 0.227368 ms)
|
| 571 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493237 ms - Host latency: 0.599243 ms (enqueue 0.227051 ms)
|
| 572 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.49436 ms - Host latency: 0.600195 ms (enqueue 0.226343 ms)
|
| 573 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493994 ms - Host latency: 0.600635 ms (enqueue 0.223096 ms)
|
| 574 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.498315 ms - Host latency: 0.609155 ms (enqueue 0.235254 ms)
|
| 575 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493945 ms - Host latency: 0.601587 ms (enqueue 0.223877 ms)
|
| 576 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.493628 ms - Host latency: 0.60144 ms (enqueue 0.223169 ms)
|
| 577 |
+
[01/20/2026-06:51:03] [I] Average on 10 runs - GPU latency: 0.497119 ms - Host latency: 0.607739 ms (enqueue 0.232178 ms)
|
| 578 |
+
[01/20/2026-06:51:03] [I]
|
| 579 |
+
[01/20/2026-06:51:03] [I] === Performance summary ===
|
| 580 |
+
[01/20/2026-06:51:03] [I] Throughput: 2015.16 qps
|
| 581 |
+
[01/20/2026-06:51:03] [I] Latency: min = 0.59082 ms, max = 0.656494 ms, mean = 0.602234 ms, median = 0.601349 ms, percentile(90%) = 0.604248 ms, percentile(95%) = 0.607422 ms, percentile(99%) = 0.625244 ms
|
| 582 |
+
[01/20/2026-06:51:03] [I] Enqueue Time: min = 0.209778 ms, max = 0.529785 ms, mean = 0.229519 ms, median = 0.223633 ms, percentile(90%) = 0.239868 ms, percentile(95%) = 0.25647 ms, percentile(99%) = 0.393066 ms
|
| 583 |
+
[01/20/2026-06:51:03] [I] H2D Latency: min = 0.0942383 ms, max = 0.155121 ms, mean = 0.103044 ms, median = 0.102768 ms, percentile(90%) = 0.104492 ms, percentile(95%) = 0.105225 ms, percentile(99%) = 0.121338 ms
|
| 584 |
+
[01/20/2026-06:51:03] [I] GPU Compute Time: min = 0.490479 ms, max = 0.550049 ms, mean = 0.494521 ms, median = 0.493774 ms, percentile(90%) = 0.495605 ms, percentile(95%) = 0.496643 ms, percentile(99%) = 0.513916 ms
|
| 585 |
+
[01/20/2026-06:51:03] [I] D2H Latency: min = 0.00415039 ms, max = 0.0305176 ms, mean = 0.00466856 ms, median = 0.00440979 ms, percentile(90%) = 0.00537109 ms, percentile(95%) = 0.00561523 ms, percentile(99%) = 0.00598145 ms
|
| 586 |
+
[01/20/2026-06:51:03] [I] Total Host Walltime: 3.00125 s
|
| 587 |
+
[01/20/2026-06:51:03] [I] Total GPU Compute Time: 2.99086 s
|
| 588 |
+
[01/20/2026-06:51:03] [I] Explanations of the performance metrics are printed in the verbose logs.
|
| 589 |
+
[01/20/2026-06:51:03] [I]
|
| 590 |
+
&&&& PASSED TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_hgnetv2_femto_coco.onnx --saveEngine=checkpoints/deimv2_hgnetv2_femto_coco.engine --fp16 --optShapes=images:1x3x416x416,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
deimv2_hgnetv2_femto_coco.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3dad4790924ccdb8186ed3bc81ad5721e9d0decdf93bbbd292df2824cdafa9bf
|
| 3 |
+
size 4144818
|
deimv2_hgnetv2_femto_coco.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1eebcaeacd1d7f1547b4fbc87fa690dec9eaeb9faa2770c93644e79ac7a666a7
|
| 3 |
+
size 4163663
|
deimv2_hgnetv2_n_coco.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3279909fc0123dbb9a555ed0e697814d903c1d2b7f5c70041b0a296d642096ab
|
| 3 |
+
size 10641764
|
deimv2_hgnetv2_n_coco.log
ADDED
|
@@ -0,0 +1,590 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
&&&& RUNNING TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_hgnetv2_n_coco.onnx --saveEngine=checkpoints/deimv2_hgnetv2_n_coco.engine --fp16 --optShapes=images:1x3x640x640,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
| 2 |
+
[01/20/2026-06:53:03] [W] optShapes is being broadcasted to minShapes for tensor orig_target_sizes
|
| 3 |
+
[01/20/2026-06:53:03] [W] optShapes is being broadcasted to maxShapes for tensor orig_target_sizes
|
| 4 |
+
[01/20/2026-06:53:03] [W] optShapes is being broadcasted to minShapes for tensor images
|
| 5 |
+
[01/20/2026-06:53:03] [W] optShapes is being broadcasted to maxShapes for tensor images
|
| 6 |
+
[01/20/2026-06:53:03] [W] Weakly-typed networks have been deprecated in TensorRT. You can use the AutoCast tool (https://nvidia.github.io/TensorRT-Model-Optimizer/guides/8_autocast.html) to convert the network to be strongly typed.
|
| 7 |
+
[01/20/2026-06:53:03] [I] === Model Options ===
|
| 8 |
+
[01/20/2026-06:53:03] [I] Format: ONNX
|
| 9 |
+
[01/20/2026-06:53:03] [I] Model: checkpoints/deimv2_hgnetv2_n_coco.onnx
|
| 10 |
+
[01/20/2026-06:53:03] [I] Output:
|
| 11 |
+
[01/20/2026-06:53:03] [I] === Build Options ===
|
| 12 |
+
[01/20/2026-06:53:03] [I] Memory Pools: workspace: 4096 MiB, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default, tacticSharedMem: default
|
| 13 |
+
[01/20/2026-06:53:03] [I] avgTiming: 8
|
| 14 |
+
[01/20/2026-06:53:03] [I] Precision: FP32+FP16
|
| 15 |
+
[01/20/2026-06:53:03] [I] LayerPrecisions:
|
| 16 |
+
[01/20/2026-06:53:03] [I] Layer Device Types:
|
| 17 |
+
[01/20/2026-06:53:03] [I] Decomposable Attentions:
|
| 18 |
+
[01/20/2026-06:53:03] [I] Calibration:
|
| 19 |
+
[01/20/2026-06:53:03] [I] Refit: Disabled
|
| 20 |
+
[01/20/2026-06:53:03] [I] Strip weights: Disabled
|
| 21 |
+
[01/20/2026-06:53:03] [I] Version Compatible: Disabled
|
| 22 |
+
[01/20/2026-06:53:03] [I] ONNX Plugin InstanceNorm: Disabled
|
| 23 |
+
[01/20/2026-06:53:03] [I] ONNX kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA flag: Disabled
|
| 24 |
+
[01/20/2026-06:53:03] [I] TensorRT runtime: full
|
| 25 |
+
[01/20/2026-06:53:03] [I] Lean DLL Path:
|
| 26 |
+
[01/20/2026-06:53:03] [I] Tempfile Controls: { in_memory: allow, temporary: allow }
|
| 27 |
+
[01/20/2026-06:53:03] [I] Exclude Lean Runtime: Disabled
|
| 28 |
+
[01/20/2026-06:53:03] [I] Sparsity: Disabled
|
| 29 |
+
[01/20/2026-06:53:03] [I] Safe mode: Disabled
|
| 30 |
+
[01/20/2026-06:53:03] [I] Build DLA standalone loadable: Disabled
|
| 31 |
+
[01/20/2026-06:53:03] [I] Allow GPU fallback for DLA: Disabled
|
| 32 |
+
[01/20/2026-06:53:03] [I] DirectIO mode: Disabled
|
| 33 |
+
[01/20/2026-06:53:03] [I] Restricted mode: Disabled
|
| 34 |
+
[01/20/2026-06:53:03] [I] Skip inference: Disabled
|
| 35 |
+
[01/20/2026-06:53:03] [I] Save engine: checkpoints/deimv2_hgnetv2_n_coco.engine
|
| 36 |
+
[01/20/2026-06:53:03] [I] Load engine:
|
| 37 |
+
[01/20/2026-06:53:03] [I] Profiling verbosity: 0
|
| 38 |
+
[01/20/2026-06:53:03] [I] Tactic sources: Using default tactic sources
|
| 39 |
+
[01/20/2026-06:53:03] [I] timingCacheMode: local
|
| 40 |
+
[01/20/2026-06:53:03] [I] timingCacheFile:
|
| 41 |
+
[01/20/2026-06:53:03] [I] Enable Compilation Cache: Enabled
|
| 42 |
+
[01/20/2026-06:53:03] [I] Enable Monitor Memory: Disabled
|
| 43 |
+
[01/20/2026-06:53:03] [I] errorOnTimingCacheMiss: Disabled
|
| 44 |
+
[01/20/2026-06:53:03] [I] Preview Features: Use default preview flags.
|
| 45 |
+
[01/20/2026-06:53:03] [I] MaxAuxStreams: -1
|
| 46 |
+
[01/20/2026-06:53:03] [I] BuilderOptimizationLevel: 3
|
| 47 |
+
[01/20/2026-06:53:03] [I] MaxTactics: -1
|
| 48 |
+
[01/20/2026-06:53:03] [I] Calibration Profile Index: 0
|
| 49 |
+
[01/20/2026-06:53:03] [I] Weight Streaming: Disabled
|
| 50 |
+
[01/20/2026-06:53:03] [I] Runtime Platform: Same As Build
|
| 51 |
+
[01/20/2026-06:53:03] [I] Debug Tensors:
|
| 52 |
+
[01/20/2026-06:53:03] [I] Distributive Independence: Disabled
|
| 53 |
+
[01/20/2026-06:53:03] [I] Mark Unfused Tensors As Debug Tensors: Disabled
|
| 54 |
+
[01/20/2026-06:53:03] [I] Input(s)s format: fp32:CHW
|
| 55 |
+
[01/20/2026-06:53:03] [I] Output(s)s format: fp32:CHW
|
| 56 |
+
[01/20/2026-06:53:03] [I] Input build shape (profile 0): images=1x3x640x640+1x3x640x640+1x3x640x640
|
| 57 |
+
[01/20/2026-06:53:03] [I] Input build shape (profile 0): orig_target_sizes=1x2+1x2+1x2
|
| 58 |
+
[01/20/2026-06:53:03] [I] Input calibration shapes: model
|
| 59 |
+
[01/20/2026-06:53:03] [I] === System Options ===
|
| 60 |
+
[01/20/2026-06:53:03] [I] Device: 0
|
| 61 |
+
[01/20/2026-06:53:03] [I] DLACore:
|
| 62 |
+
[01/20/2026-06:53:03] [I] Plugins:
|
| 63 |
+
[01/20/2026-06:53:03] [I] setPluginsToSerialize:
|
| 64 |
+
[01/20/2026-06:53:03] [I] dynamicPlugins:
|
| 65 |
+
[01/20/2026-06:53:03] [I] ignoreParsedPluginLibs: 0
|
| 66 |
+
[01/20/2026-06:53:03] [I]
|
| 67 |
+
[01/20/2026-06:53:03] [I] === Inference Options ===
|
| 68 |
+
[01/20/2026-06:53:03] [I] Batch: Explicit
|
| 69 |
+
[01/20/2026-06:53:03] [I] Input inference shape : orig_target_sizes=1x2
|
| 70 |
+
[01/20/2026-06:53:03] [I] Input inference shape : images=1x3x640x640
|
| 71 |
+
[01/20/2026-06:53:03] [I] Iterations: 10
|
| 72 |
+
[01/20/2026-06:53:03] [I] Duration: 3s (+ 200ms warm up)
|
| 73 |
+
[01/20/2026-06:53:03] [I] Sleep time: 0ms
|
| 74 |
+
[01/20/2026-06:53:03] [I] Idle time: 0ms
|
| 75 |
+
[01/20/2026-06:53:03] [I] Inference Streams: 1
|
| 76 |
+
[01/20/2026-06:53:03] [I] ExposeDMA: Disabled
|
| 77 |
+
[01/20/2026-06:53:03] [I] Data transfers: Enabled
|
| 78 |
+
[01/20/2026-06:53:03] [I] Spin-wait: Disabled
|
| 79 |
+
[01/20/2026-06:53:03] [I] Multithreading: Disabled
|
| 80 |
+
[01/20/2026-06:53:03] [I] CUDA Graph: Disabled
|
| 81 |
+
[01/20/2026-06:53:03] [I] Separate profiling: Disabled
|
| 82 |
+
[01/20/2026-06:53:03] [I] Time Deserialize: Disabled
|
| 83 |
+
[01/20/2026-06:53:03] [I] Time Refit: Disabled
|
| 84 |
+
[01/20/2026-06:53:03] [I] NVTX verbosity: 0
|
| 85 |
+
[01/20/2026-06:53:03] [I] Persistent Cache Ratio: 0
|
| 86 |
+
[01/20/2026-06:53:03] [I] Optimization Profile Index: 0
|
| 87 |
+
[01/20/2026-06:53:03] [I] Weight Streaming Budget: 100.000000%
|
| 88 |
+
[01/20/2026-06:53:03] [I] Inputs:
|
| 89 |
+
[01/20/2026-06:53:03] [I] Debug Tensor Save Destinations:
|
| 90 |
+
[01/20/2026-06:53:03] [I] Dump All Debug Tensor in Formats:
|
| 91 |
+
[01/20/2026-06:53:03] [I] === Reporting Options ===
|
| 92 |
+
[01/20/2026-06:53:03] [I] Verbose: Disabled
|
| 93 |
+
[01/20/2026-06:53:03] [I] Averages: 10 inferences
|
| 94 |
+
[01/20/2026-06:53:03] [I] Percentiles: 90,95,99
|
| 95 |
+
[01/20/2026-06:53:03] [I] Dump refittable layers:Disabled
|
| 96 |
+
[01/20/2026-06:53:03] [I] Dump output: Disabled
|
| 97 |
+
[01/20/2026-06:53:03] [I] Profile: Disabled
|
| 98 |
+
[01/20/2026-06:53:03] [I] Export timing to JSON file:
|
| 99 |
+
[01/20/2026-06:53:03] [I] Export output to JSON file:
|
| 100 |
+
[01/20/2026-06:53:03] [I] Export profile to JSON file:
|
| 101 |
+
[01/20/2026-06:53:03] [I]
|
| 102 |
+
[01/20/2026-06:53:03] [I] === Device Information ===
|
| 103 |
+
[01/20/2026-06:53:03] [I] Available Devices:
|
| 104 |
+
[01/20/2026-06:53:03] [I] Device 0: "NVIDIA GeForce RTX 4090" UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 105 |
+
[01/20/2026-06:53:03] [I] Selected Device: NVIDIA GeForce RTX 4090
|
| 106 |
+
[01/20/2026-06:53:03] [I] Selected Device ID: 0
|
| 107 |
+
[01/20/2026-06:53:03] [I] Selected Device UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 108 |
+
[01/20/2026-06:53:03] [I] Compute Capability: 8.9
|
| 109 |
+
[01/20/2026-06:53:03] [I] SMs: 128
|
| 110 |
+
[01/20/2026-06:53:03] [I] Device Global Memory: 24071 MiB
|
| 111 |
+
[01/20/2026-06:53:03] [I] Shared Memory per SM: 100 KiB
|
| 112 |
+
[01/20/2026-06:53:03] [I] Memory Bus Width: 384 bits (ECC disabled)
|
| 113 |
+
[01/20/2026-06:53:03] [I] Application Compute Clock Rate: 2.52 GHz
|
| 114 |
+
[01/20/2026-06:53:03] [I] Application Memory Clock Rate: 10.501 GHz
|
| 115 |
+
[01/20/2026-06:53:03] [I]
|
| 116 |
+
[01/20/2026-06:53:03] [I] Note: The application clock rates do not reflect the actual clock rates that the GPU is currently running at.
|
| 117 |
+
[01/20/2026-06:53:03] [I]
|
| 118 |
+
[01/20/2026-06:53:03] [I] TensorRT version: 10.14.1
|
| 119 |
+
[01/20/2026-06:53:03] [I] Loading standard plugins
|
| 120 |
+
[01/20/2026-06:53:03] [I] [TRT] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 29, GPU 10549 (MiB)
|
| 121 |
+
[01/20/2026-06:53:03] [I] Start parsing network model.
|
| 122 |
+
[01/20/2026-06:53:03] [I] [TRT] ----------------------------------------------------------------
|
| 123 |
+
[01/20/2026-06:53:03] [I] [TRT] Input filename: checkpoints/deimv2_hgnetv2_n_coco.onnx
|
| 124 |
+
[01/20/2026-06:53:03] [I] [TRT] ONNX IR version: 0.0.8
|
| 125 |
+
[01/20/2026-06:53:03] [I] [TRT] Opset version: 17
|
| 126 |
+
[01/20/2026-06:53:03] [I] [TRT] Producer name: pytorch
|
| 127 |
+
[01/20/2026-06:53:03] [I] [TRT] Producer version: 2.10.0
|
| 128 |
+
[01/20/2026-06:53:03] [I] [TRT] Domain:
|
| 129 |
+
[01/20/2026-06:53:03] [I] [TRT] Model version: 0
|
| 130 |
+
[01/20/2026-06:53:03] [I] [TRT] Doc string:
|
| 131 |
+
[01/20/2026-06:53:03] [I] [TRT] ----------------------------------------------------------------
|
| 132 |
+
[01/20/2026-06:53:03] [W] [TRT] ModelImporter.cpp:661: Make sure input orig_target_sizes has Int64 binding.
|
| 133 |
+
[01/20/2026-06:53:03] [W] [TRT] ModelImporter.cpp:908: Make sure output labels has Int64 binding.
|
| 134 |
+
[01/20/2026-06:53:03] [I] Finished parsing network model. Parse time: 0.0334079
|
| 135 |
+
[01/20/2026-06:53:03] [I] Set shape of input tensor images for optimization profile 0 to: MIN=1x3x640x640 OPT=1x3x640x640 MAX=1x3x640x640
|
| 136 |
+
[01/20/2026-06:53:03] [I] Set shape of input tensor orig_target_sizes for optimization profile 0 to: MIN=1x2 OPT=1x2 MAX=1x2
|
| 137 |
+
[01/20/2026-06:53:04] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +205, GPU +4, now: CPU 527, GPU 10553 (MiB)
|
| 138 |
+
[01/20/2026-06:53:04] [W] [TRT] Detected layernorm nodes in FP16.
|
| 139 |
+
[01/20/2026-06:53:04] [W] [TRT] Running layernorm after self-attention with FP16 Reduce or Pow may cause overflow. Forcing Reduce or Pow Layers in FP32 precision, or exporting the model to use INormalizationLayer (available with ONNX opset >= 17) can help preserving accuracy.
|
| 140 |
+
[01/20/2026-06:53:04] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored.
|
| 141 |
+
[01/20/2026-06:53:56] [I] [TRT] Compiler backend is used during engine build.
|
| 142 |
+
[01/20/2026-06:55:03] [I] [TRT] Detected 2 inputs and 3 output network tensors.
|
| 143 |
+
[01/20/2026-06:55:04] [I] [TRT] Total Host Persistent Memory: 402368 bytes
|
| 144 |
+
[01/20/2026-06:55:04] [I] [TRT] Total Device Persistent Memory: 1024 bytes
|
| 145 |
+
[01/20/2026-06:55:04] [I] [TRT] Max Scratch Memory: 3229184 bytes
|
| 146 |
+
[01/20/2026-06:55:04] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 99 steps to complete.
|
| 147 |
+
[01/20/2026-06:55:04] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 1.87911ms to assign 10 blocks to 99 nodes requiring 12494336 bytes.
|
| 148 |
+
[01/20/2026-06:55:04] [I] [TRT] Total Activation Memory: 12494336 bytes
|
| 149 |
+
[01/20/2026-06:55:04] [I] [TRT] Total Weights Memory: 7322496 bytes
|
| 150 |
+
[01/20/2026-06:55:05] [I] [TRT] Compiler backend is used during engine execution.
|
| 151 |
+
[01/20/2026-06:55:05] [I] [TRT] Engine generation completed in 120.832 seconds.
|
| 152 |
+
[01/20/2026-06:55:05] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 2 MiB, GPU 43 MiB
|
| 153 |
+
[01/20/2026-06:55:05] [I] Created engine with size: 10.1488 MiB
|
| 154 |
+
[01/20/2026-06:55:05] [I] Engine built in 121.083 sec.
|
| 155 |
+
[01/20/2026-06:55:05] [I] [TRT] Loaded engine size: 10 MiB
|
| 156 |
+
[01/20/2026-06:55:05] [I] Engine deserialized in 0.0130846 sec.
|
| 157 |
+
[01/20/2026-06:55:05] [I] [TRT] [MS] Running engine with multi stream info
|
| 158 |
+
[01/20/2026-06:55:05] [I] [TRT] [MS] Number of aux streams is 2
|
| 159 |
+
[01/20/2026-06:55:05] [I] [TRT] [MS] Number of total worker streams is 3
|
| 160 |
+
[01/20/2026-06:55:05] [I] [TRT] [MS] The main stream provided by execute/enqueue calls is the first worker stream
|
| 161 |
+
[01/20/2026-06:55:05] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +12, now: CPU 0, GPU 18 (MiB)
|
| 162 |
+
[01/20/2026-06:55:05] [I] Setting persistentCacheLimit to 0 bytes.
|
| 163 |
+
[01/20/2026-06:55:05] [I] Created execution context with device memory size: 11.9155 MiB
|
| 164 |
+
[01/20/2026-06:55:05] [I] Using random values for input images
|
| 165 |
+
[01/20/2026-06:55:05] [I] Input binding for images with dimensions 1x3x640x640 is created.
|
| 166 |
+
[01/20/2026-06:55:05] [I] Using random values for input orig_target_sizes
|
| 167 |
+
[01/20/2026-06:55:05] [I] Input binding for orig_target_sizes with dimensions 1x2 is created.
|
| 168 |
+
[01/20/2026-06:55:05] [I] Output binding for labels with dimensions 1x300 is created.
|
| 169 |
+
[01/20/2026-06:55:05] [I] Output binding for boxes with dimensions 1x300x4 is created.
|
| 170 |
+
[01/20/2026-06:55:05] [I] Output binding for scores with dimensions 1x300 is created.
|
| 171 |
+
[01/20/2026-06:55:05] [I] Starting inference
|
| 172 |
+
[01/20/2026-06:55:08] [I] Warmup completed 287 queries over 200 ms
|
| 173 |
+
[01/20/2026-06:55:08] [I] Timing trace has 4296 queries over 3.00228 s
|
| 174 |
+
[01/20/2026-06:55:08] [I]
|
| 175 |
+
[01/20/2026-06:55:08] [I] === Trace details ===
|
| 176 |
+
[01/20/2026-06:55:08] [I] Trace averages of 10 runs:
|
| 177 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690395 ms - Host latency: 0.919766 ms (enqueue 0.307228 ms)
|
| 178 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69028 ms - Host latency: 0.919734 ms (enqueue 0.309088 ms)
|
| 179 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689502 ms - Host latency: 0.917297 ms (enqueue 0.30425 ms)
|
| 180 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.705373 ms - Host latency: 0.93367 ms (enqueue 0.6026 ms)
|
| 181 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.701196 ms - Host latency: 0.928915 ms (enqueue 0.492586 ms)
|
| 182 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689682 ms - Host latency: 0.917651 ms (enqueue 0.334079 ms)
|
| 183 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689722 ms - Host latency: 0.916315 ms (enqueue 0.334933 ms)
|
| 184 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689667 ms - Host latency: 0.916776 ms (enqueue 0.318919 ms)
|
| 185 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691223 ms - Host latency: 0.918683 ms (enqueue 0.329697 ms)
|
| 186 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690964 ms - Host latency: 0.918402 ms (enqueue 0.306866 ms)
|
| 187 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690405 ms - Host latency: 0.919589 ms (enqueue 0.325079 ms)
|
| 188 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.693604 ms - Host latency: 0.921353 ms (enqueue 0.449445 ms)
|
| 189 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.700851 ms - Host latency: 0.928735 ms (enqueue 0.477618 ms)
|
| 190 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.703339 ms - Host latency: 0.931647 ms (enqueue 0.752518 ms)
|
| 191 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.699008 ms - Host latency: 0.925354 ms (enqueue 0.605197 ms)
|
| 192 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.699902 ms - Host latency: 0.926758 ms (enqueue 0.633948 ms)
|
| 193 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.70397 ms - Host latency: 0.931485 ms (enqueue 0.715582 ms)
|
| 194 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.71312 ms - Host latency: 0.939001 ms (enqueue 0.701132 ms)
|
| 195 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.704895 ms - Host latency: 0.931793 ms (enqueue 0.527603 ms)
|
| 196 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690387 ms - Host latency: 0.917697 ms (enqueue 0.326929 ms)
|
| 197 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69017 ms - Host latency: 0.918271 ms (enqueue 0.328525 ms)
|
| 198 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691342 ms - Host latency: 0.919809 ms (enqueue 0.307928 ms)
|
| 199 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690637 ms - Host latency: 0.919464 ms (enqueue 0.317416 ms)
|
| 200 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.68938 ms - Host latency: 0.918329 ms (enqueue 0.305902 ms)
|
| 201 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.68999 ms - Host latency: 0.919446 ms (enqueue 0.310776 ms)
|
| 202 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689975 ms - Host latency: 0.918863 ms (enqueue 0.306528 ms)
|
| 203 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690698 ms - Host latency: 0.918652 ms (enqueue 0.311133 ms)
|
| 204 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690353 ms - Host latency: 0.918417 ms (enqueue 0.3073 ms)
|
| 205 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690497 ms - Host latency: 0.919394 ms (enqueue 0.30517 ms)
|
| 206 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689841 ms - Host latency: 0.918314 ms (enqueue 0.309009 ms)
|
| 207 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690335 ms - Host latency: 0.919134 ms (enqueue 0.304581 ms)
|
| 208 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690173 ms - Host latency: 0.91835 ms (enqueue 0.308972 ms)
|
| 209 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689746 ms - Host latency: 0.917984 ms (enqueue 0.304126 ms)
|
| 210 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690021 ms - Host latency: 0.9185 ms (enqueue 0.304776 ms)
|
| 211 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689978 ms - Host latency: 0.919333 ms (enqueue 0.310419 ms)
|
| 212 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690384 ms - Host latency: 0.91843 ms (enqueue 0.304224 ms)
|
| 213 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691367 ms - Host latency: 0.920929 ms (enqueue 0.306961 ms)
|
| 214 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690677 ms - Host latency: 0.919189 ms (enqueue 0.304025 ms)
|
| 215 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690689 ms - Host latency: 0.918689 ms (enqueue 0.303949 ms)
|
| 216 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690024 ms - Host latency: 0.919098 ms (enqueue 0.306903 ms)
|
| 217 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689938 ms - Host latency: 0.91944 ms (enqueue 0.304733 ms)
|
| 218 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689774 ms - Host latency: 0.918259 ms (enqueue 0.308612 ms)
|
| 219 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689746 ms - Host latency: 0.919043 ms (enqueue 0.304071 ms)
|
| 220 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690753 ms - Host latency: 0.919119 ms (enqueue 0.306995 ms)
|
| 221 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689517 ms - Host latency: 0.918619 ms (enqueue 0.304254 ms)
|
| 222 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690656 ms - Host latency: 0.918671 ms (enqueue 0.303894 ms)
|
| 223 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689618 ms - Host latency: 0.918146 ms (enqueue 0.310718 ms)
|
| 224 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689941 ms - Host latency: 0.919299 ms (enqueue 0.30426 ms)
|
| 225 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690265 ms - Host latency: 0.917975 ms (enqueue 0.308655 ms)
|
| 226 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690033 ms - Host latency: 0.919293 ms (enqueue 0.304211 ms)
|
| 227 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690607 ms - Host latency: 0.920068 ms (enqueue 0.316748 ms)
|
| 228 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.6896 ms - Host latency: 0.918976 ms (enqueue 0.314392 ms)
|
| 229 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689691 ms - Host latency: 0.917535 ms (enqueue 0.308551 ms)
|
| 230 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690326 ms - Host latency: 0.919257 ms (enqueue 0.309924 ms)
|
| 231 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690155 ms - Host latency: 0.918597 ms (enqueue 0.305078 ms)
|
| 232 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.68949 ms - Host latency: 0.917371 ms (enqueue 0.30827 ms)
|
| 233 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689886 ms - Host latency: 0.918903 ms (enqueue 0.306262 ms)
|
| 234 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690118 ms - Host latency: 0.919134 ms (enqueue 0.303931 ms)
|
| 235 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.68924 ms - Host latency: 0.917847 ms (enqueue 0.316858 ms)
|
| 236 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689557 ms - Host latency: 0.918066 ms (enqueue 0.304633 ms)
|
| 237 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689648 ms - Host latency: 0.917242 ms (enqueue 0.308942 ms)
|
| 238 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690375 ms - Host latency: 0.919141 ms (enqueue 0.304797 ms)
|
| 239 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689996 ms - Host latency: 0.918604 ms (enqueue 0.303674 ms)
|
| 240 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689734 ms - Host latency: 0.919196 ms (enqueue 0.307715 ms)
|
| 241 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689691 ms - Host latency: 0.918817 ms (enqueue 0.303552 ms)
|
| 242 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690411 ms - Host latency: 0.918604 ms (enqueue 0.30636 ms)
|
| 243 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689685 ms - Host latency: 0.91734 ms (enqueue 0.303522 ms)
|
| 244 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689709 ms - Host latency: 0.918915 ms (enqueue 0.303973 ms)
|
| 245 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690326 ms - Host latency: 0.91972 ms (enqueue 0.327997 ms)
|
| 246 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690265 ms - Host latency: 0.918787 ms (enqueue 0.305579 ms)
|
| 247 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690289 ms - Host latency: 0.917981 ms (enqueue 0.329095 ms)
|
| 248 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689282 ms - Host latency: 0.916864 ms (enqueue 0.301672 ms)
|
| 249 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689569 ms - Host latency: 0.917041 ms (enqueue 0.300519 ms)
|
| 250 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.688989 ms - Host latency: 0.9185 ms (enqueue 0.313867 ms)
|
| 251 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689734 ms - Host latency: 0.917377 ms (enqueue 0.29621 ms)
|
| 252 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690289 ms - Host latency: 0.919733 ms (enqueue 0.30246 ms)
|
| 253 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689679 ms - Host latency: 0.91861 ms (enqueue 0.297205 ms)
|
| 254 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689545 ms - Host latency: 0.918488 ms (enqueue 0.303094 ms)
|
| 255 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.694342 ms - Host latency: 0.922125 ms (enqueue 0.474976 ms)
|
| 256 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.715472 ms - Host latency: 0.942883 ms (enqueue 0.701794 ms)
|
| 257 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.709644 ms - Host latency: 0.936145 ms (enqueue 0.769312 ms)
|
| 258 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.718701 ms - Host latency: 0.943799 ms (enqueue 0.6901 ms)
|
| 259 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.718603 ms - Host latency: 0.946008 ms (enqueue 0.690704 ms)
|
| 260 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69046 ms - Host latency: 0.918903 ms (enqueue 0.360406 ms)
|
| 261 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690356 ms - Host latency: 0.918756 ms (enqueue 0.349268 ms)
|
| 262 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69032 ms - Host latency: 0.919409 ms (enqueue 0.333716 ms)
|
| 263 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690222 ms - Host latency: 0.918671 ms (enqueue 0.331396 ms)
|
| 264 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689868 ms - Host latency: 0.919006 ms (enqueue 0.334015 ms)
|
| 265 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690753 ms - Host latency: 0.919659 ms (enqueue 0.324933 ms)
|
| 266 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689954 ms - Host latency: 0.917731 ms (enqueue 0.350427 ms)
|
| 267 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690613 ms - Host latency: 0.918457 ms (enqueue 0.309082 ms)
|
| 268 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69035 ms - Host latency: 0.918524 ms (enqueue 0.314954 ms)
|
| 269 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689301 ms - Host latency: 0.917505 ms (enqueue 0.306165 ms)
|
| 270 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689825 ms - Host latency: 0.91897 ms (enqueue 0.298175 ms)
|
| 271 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690277 ms - Host latency: 0.919324 ms (enqueue 0.307776 ms)
|
| 272 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689795 ms - Host latency: 0.918457 ms (enqueue 0.29693 ms)
|
| 273 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690161 ms - Host latency: 0.918488 ms (enqueue 0.302686 ms)
|
| 274 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689954 ms - Host latency: 0.919214 ms (enqueue 0.296948 ms)
|
| 275 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689703 ms - Host latency: 0.918762 ms (enqueue 0.298163 ms)
|
| 276 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690308 ms - Host latency: 0.919397 ms (enqueue 0.30072 ms)
|
| 277 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689264 ms - Host latency: 0.916876 ms (enqueue 0.296857 ms)
|
| 278 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689587 ms - Host latency: 0.917297 ms (enqueue 0.335138 ms)
|
| 279 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69043 ms - Host latency: 0.918445 ms (enqueue 0.314209 ms)
|
| 280 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690369 ms - Host latency: 0.92027 ms (enqueue 0.310297 ms)
|
| 281 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689935 ms - Host latency: 0.919324 ms (enqueue 0.302466 ms)
|
| 282 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690234 ms - Host latency: 0.918066 ms (enqueue 0.296344 ms)
|
| 283 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689923 ms - Host latency: 0.918359 ms (enqueue 0.304181 ms)
|
| 284 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690662 ms - Host latency: 0.919678 ms (enqueue 0.296429 ms)
|
| 285 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690063 ms - Host latency: 0.91933 ms (enqueue 0.300616 ms)
|
| 286 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689819 ms - Host latency: 0.918268 ms (enqueue 0.298126 ms)
|
| 287 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690515 ms - Host latency: 0.919702 ms (enqueue 0.295563 ms)
|
| 288 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.688947 ms - Host latency: 0.91734 ms (enqueue 0.303027 ms)
|
| 289 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690137 ms - Host latency: 0.91817 ms (enqueue 0.295483 ms)
|
| 290 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690405 ms - Host latency: 0.919525 ms (enqueue 0.304822 ms)
|
| 291 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69035 ms - Host latency: 0.919421 ms (enqueue 0.296484 ms)
|
| 292 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689587 ms - Host latency: 0.918805 ms (enqueue 0.294489 ms)
|
| 293 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690161 ms - Host latency: 0.917981 ms (enqueue 0.315344 ms)
|
| 294 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690302 ms - Host latency: 0.918701 ms (enqueue 0.306903 ms)
|
| 295 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689606 ms - Host latency: 0.918195 ms (enqueue 0.322778 ms)
|
| 296 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689307 ms - Host latency: 0.917505 ms (enqueue 0.313074 ms)
|
| 297 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691821 ms - Host latency: 0.921606 ms (enqueue 0.365295 ms)
|
| 298 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.707043 ms - Host latency: 0.935779 ms (enqueue 0.719421 ms)
|
| 299 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.706421 ms - Host latency: 0.933386 ms (enqueue 0.640784 ms)
|
| 300 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.701611 ms - Host latency: 0.9297 ms (enqueue 0.666016 ms)
|
| 301 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.695691 ms - Host latency: 0.923389 ms (enqueue 0.510803 ms)
|
| 302 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690784 ms - Host latency: 0.919592 ms (enqueue 0.318408 ms)
|
| 303 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689563 ms - Host latency: 0.917297 ms (enqueue 0.311609 ms)
|
| 304 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690259 ms - Host latency: 0.916589 ms (enqueue 0.318579 ms)
|
| 305 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689575 ms - Host latency: 0.917053 ms (enqueue 0.301746 ms)
|
| 306 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689612 ms - Host latency: 0.917725 ms (enqueue 0.306873 ms)
|
| 307 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.701013 ms - Host latency: 0.929688 ms (enqueue 0.497681 ms)
|
| 308 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.717651 ms - Host latency: 0.945386 ms (enqueue 0.707361 ms)
|
| 309 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.695715 ms - Host latency: 0.923706 ms (enqueue 0.517566 ms)
|
| 310 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689746 ms - Host latency: 0.917651 ms (enqueue 0.316309 ms)
|
| 311 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689612 ms - Host latency: 0.915637 ms (enqueue 0.317175 ms)
|
| 312 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.688354 ms - Host latency: 0.916211 ms (enqueue 0.30564 ms)
|
| 313 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690479 ms - Host latency: 0.917249 ms (enqueue 0.316724 ms)
|
| 314 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690979 ms - Host latency: 0.918042 ms (enqueue 0.320984 ms)
|
| 315 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689539 ms - Host latency: 0.918726 ms (enqueue 0.301123 ms)
|
| 316 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689685 ms - Host latency: 0.91842 ms (enqueue 0.306287 ms)
|
| 317 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690356 ms - Host latency: 0.918909 ms (enqueue 0.303784 ms)
|
| 318 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689673 ms - Host latency: 0.918787 ms (enqueue 0.301929 ms)
|
| 319 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.70437 ms - Host latency: 0.93418 ms (enqueue 0.665527 ms)
|
| 320 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.692505 ms - Host latency: 0.919165 ms (enqueue 0.526782 ms)
|
| 321 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690015 ms - Host latency: 0.919373 ms (enqueue 0.329028 ms)
|
| 322 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689587 ms - Host latency: 0.917249 ms (enqueue 0.306995 ms)
|
| 323 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690173 ms - Host latency: 0.919275 ms (enqueue 0.304346 ms)
|
| 324 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690259 ms - Host latency: 0.918494 ms (enqueue 0.312183 ms)
|
| 325 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690015 ms - Host latency: 0.918799 ms (enqueue 0.298303 ms)
|
| 326 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.6901 ms - Host latency: 0.918738 ms (enqueue 0.311157 ms)
|
| 327 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690015 ms - Host latency: 0.91875 ms (enqueue 0.310388 ms)
|
| 328 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.6901 ms - Host latency: 0.919067 ms (enqueue 0.296741 ms)
|
| 329 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691126 ms - Host latency: 0.918457 ms (enqueue 0.351941 ms)
|
| 330 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689612 ms - Host latency: 0.917249 ms (enqueue 0.32312 ms)
|
| 331 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689819 ms - Host latency: 0.916113 ms (enqueue 0.327014 ms)
|
| 332 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.701514 ms - Host latency: 0.929968 ms (enqueue 0.501367 ms)
|
| 333 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690344 ms - Host latency: 0.917322 ms (enqueue 0.345032 ms)
|
| 334 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689661 ms - Host latency: 0.917627 ms (enqueue 0.319043 ms)
|
| 335 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689563 ms - Host latency: 0.917139 ms (enqueue 0.331885 ms)
|
| 336 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689661 ms - Host latency: 0.918249 ms (enqueue 0.324353 ms)
|
| 337 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689758 ms - Host latency: 0.918066 ms (enqueue 0.31875 ms)
|
| 338 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690295 ms - Host latency: 0.917737 ms (enqueue 0.335303 ms)
|
| 339 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.68927 ms - Host latency: 0.917798 ms (enqueue 0.310901 ms)
|
| 340 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690271 ms - Host latency: 0.918677 ms (enqueue 0.299463 ms)
|
| 341 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689954 ms - Host latency: 0.919104 ms (enqueue 0.311975 ms)
|
| 342 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69043 ms - Host latency: 0.919348 ms (enqueue 0.304968 ms)
|
| 343 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691223 ms - Host latency: 0.920886 ms (enqueue 0.296521 ms)
|
| 344 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689539 ms - Host latency: 0.91687 ms (enqueue 0.303601 ms)
|
| 345 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689697 ms - Host latency: 0.918958 ms (enqueue 0.298071 ms)
|
| 346 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690222 ms - Host latency: 0.919592 ms (enqueue 0.30127 ms)
|
| 347 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690186 ms - Host latency: 0.918555 ms (enqueue 0.297644 ms)
|
| 348 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689697 ms - Host latency: 0.918835 ms (enqueue 0.295374 ms)
|
| 349 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690076 ms - Host latency: 0.918555 ms (enqueue 0.30033 ms)
|
| 350 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689868 ms - Host latency: 0.918726 ms (enqueue 0.297778 ms)
|
| 351 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689941 ms - Host latency: 0.919373 ms (enqueue 0.296204 ms)
|
| 352 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.701758 ms - Host latency: 0.929065 ms (enqueue 0.535925 ms)
|
| 353 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.697412 ms - Host latency: 0.924792 ms (enqueue 0.523328 ms)
|
| 354 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.706567 ms - Host latency: 0.933167 ms (enqueue 0.761841 ms)
|
| 355 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.713196 ms - Host latency: 0.940991 ms (enqueue 0.696399 ms)
|
| 356 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.704077 ms - Host latency: 0.930005 ms (enqueue 0.529431 ms)
|
| 357 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691284 ms - Host latency: 0.918665 ms (enqueue 0.415002 ms)
|
| 358 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.715051 ms - Host latency: 0.941028 ms (enqueue 0.795215 ms)
|
| 359 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.717786 ms - Host latency: 0.943445 ms (enqueue 0.691003 ms)
|
| 360 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.719336 ms - Host latency: 0.945666 ms (enqueue 0.691101 ms)
|
| 361 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.715613 ms - Host latency: 0.943372 ms (enqueue 0.639624 ms)
|
| 362 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.707312 ms - Host latency: 0.933508 ms (enqueue 0.736902 ms)
|
| 363 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.716309 ms - Host latency: 0.942554 ms (enqueue 0.693262 ms)
|
| 364 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.719824 ms - Host latency: 0.946863 ms (enqueue 0.689026 ms)
|
| 365 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.717737 ms - Host latency: 0.945935 ms (enqueue 0.690942 ms)
|
| 366 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.707617 ms - Host latency: 0.935706 ms (enqueue 0.586279 ms)
|
| 367 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689709 ms - Host latency: 0.916797 ms (enqueue 0.358435 ms)
|
| 368 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690637 ms - Host latency: 0.918933 ms (enqueue 0.336084 ms)
|
| 369 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690234 ms - Host latency: 0.916626 ms (enqueue 0.355444 ms)
|
| 370 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691333 ms - Host latency: 0.919202 ms (enqueue 0.318469 ms)
|
| 371 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69054 ms - Host latency: 0.918237 ms (enqueue 0.35614 ms)
|
| 372 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.6901 ms - Host latency: 0.917456 ms (enqueue 0.331787 ms)
|
| 373 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690662 ms - Host latency: 0.919409 ms (enqueue 0.342407 ms)
|
| 374 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690527 ms - Host latency: 0.916821 ms (enqueue 0.332532 ms)
|
| 375 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691162 ms - Host latency: 0.919019 ms (enqueue 0.34375 ms)
|
| 376 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690356 ms - Host latency: 0.918408 ms (enqueue 0.351331 ms)
|
| 377 |
+
[01/20/2026-06:55:08] [I] ... Omitting 296 lines
|
| 378 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69021 ms - Host latency: 0.919617 ms (enqueue 0.311804 ms)
|
| 379 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689746 ms - Host latency: 0.917834 ms (enqueue 0.318018 ms)
|
| 380 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689417 ms - Host latency: 0.916675 ms (enqueue 0.340979 ms)
|
| 381 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690857 ms - Host latency: 0.918872 ms (enqueue 0.338928 ms)
|
| 382 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689697 ms - Host latency: 0.919299 ms (enqueue 0.314258 ms)
|
| 383 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690771 ms - Host latency: 0.919128 ms (enqueue 0.325146 ms)
|
| 384 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.694763 ms - Host latency: 0.924951 ms (enqueue 0.450171 ms)
|
| 385 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.698474 ms - Host latency: 0.927368 ms (enqueue 0.650024 ms)
|
| 386 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.692444 ms - Host latency: 0.918677 ms (enqueue 0.768152 ms)
|
| 387 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69873 ms - Host latency: 0.925964 ms (enqueue 0.735632 ms)
|
| 388 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.706982 ms - Host latency: 0.93623 ms (enqueue 0.70614 ms)
|
| 389 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.707605 ms - Host latency: 0.94021 ms (enqueue 0.709253 ms)
|
| 390 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.718201 ms - Host latency: 0.943115 ms (enqueue 0.693726 ms)
|
| 391 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.719031 ms - Host latency: 0.945154 ms (enqueue 0.694775 ms)
|
| 392 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.718249 ms - Host latency: 0.943994 ms (enqueue 0.693103 ms)
|
| 393 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.71991 ms - Host latency: 0.947119 ms (enqueue 0.689917 ms)
|
| 394 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.720251 ms - Host latency: 0.948523 ms (enqueue 0.698315 ms)
|
| 395 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.720349 ms - Host latency: 0.947461 ms (enqueue 0.696851 ms)
|
| 396 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.719177 ms - Host latency: 0.94646 ms (enqueue 0.68866 ms)
|
| 397 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.716016 ms - Host latency: 0.943555 ms (enqueue 0.698987 ms)
|
| 398 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690161 ms - Host latency: 0.918799 ms (enqueue 0.345935 ms)
|
| 399 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690344 ms - Host latency: 0.918359 ms (enqueue 0.330579 ms)
|
| 400 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689954 ms - Host latency: 0.918335 ms (enqueue 0.310767 ms)
|
| 401 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.696082 ms - Host latency: 0.923743 ms (enqueue 0.714478 ms)
|
| 402 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689929 ms - Host latency: 0.917468 ms (enqueue 0.31615 ms)
|
| 403 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.695312 ms - Host latency: 0.922803 ms (enqueue 0.626025 ms)
|
| 404 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.702624 ms - Host latency: 0.929138 ms (enqueue 0.731677 ms)
|
| 405 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.706348 ms - Host latency: 0.933691 ms (enqueue 0.615845 ms)
|
| 406 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690308 ms - Host latency: 0.919275 ms (enqueue 0.317114 ms)
|
| 407 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689685 ms - Host latency: 0.91803 ms (enqueue 0.324573 ms)
|
| 408 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690405 ms - Host latency: 0.916394 ms (enqueue 0.341711 ms)
|
| 409 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690649 ms - Host latency: 0.918445 ms (enqueue 0.326392 ms)
|
| 410 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.694031 ms - Host latency: 0.922705 ms (enqueue 0.517664 ms)
|
| 411 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691187 ms - Host latency: 0.917615 ms (enqueue 0.501086 ms)
|
| 412 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689685 ms - Host latency: 0.917102 ms (enqueue 0.315588 ms)
|
| 413 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689429 ms - Host latency: 0.917139 ms (enqueue 0.328564 ms)
|
| 414 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689136 ms - Host latency: 0.918042 ms (enqueue 0.306763 ms)
|
| 415 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690454 ms - Host latency: 0.920044 ms (enqueue 0.313452 ms)
|
| 416 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689014 ms - Host latency: 0.917261 ms (enqueue 0.313892 ms)
|
| 417 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689917 ms - Host latency: 0.91875 ms (enqueue 0.302026 ms)
|
| 418 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689404 ms - Host latency: 0.91709 ms (enqueue 0.330835 ms)
|
| 419 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689624 ms - Host latency: 0.917798 ms (enqueue 0.313574 ms)
|
| 420 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.702563 ms - Host latency: 0.929248 ms (enqueue 0.510156 ms)
|
| 421 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.695557 ms - Host latency: 0.923047 ms (enqueue 0.406201 ms)
|
| 422 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689795 ms - Host latency: 0.917188 ms (enqueue 0.323291 ms)
|
| 423 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689502 ms - Host latency: 0.915283 ms (enqueue 0.328687 ms)
|
| 424 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690576 ms - Host latency: 0.91875 ms (enqueue 0.306177 ms)
|
| 425 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690161 ms - Host latency: 0.918457 ms (enqueue 0.316968 ms)
|
| 426 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690503 ms - Host latency: 0.918848 ms (enqueue 0.304053 ms)
|
| 427 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690088 ms - Host latency: 0.916992 ms (enqueue 0.310889 ms)
|
| 428 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689502 ms - Host latency: 0.918286 ms (enqueue 0.307983 ms)
|
| 429 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689526 ms - Host latency: 0.917993 ms (enqueue 0.305688 ms)
|
| 430 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689966 ms - Host latency: 0.91936 ms (enqueue 0.315674 ms)
|
| 431 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690625 ms - Host latency: 0.918701 ms (enqueue 0.30459 ms)
|
| 432 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689868 ms - Host latency: 0.918628 ms (enqueue 0.334082 ms)
|
| 433 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689185 ms - Host latency: 0.918188 ms (enqueue 0.3198 ms)
|
| 434 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690308 ms - Host latency: 0.919336 ms (enqueue 0.310547 ms)
|
| 435 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690015 ms - Host latency: 0.91731 ms (enqueue 0.334106 ms)
|
| 436 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689404 ms - Host latency: 0.917651 ms (enqueue 0.311279 ms)
|
| 437 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689722 ms - Host latency: 0.91897 ms (enqueue 0.310278 ms)
|
| 438 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689966 ms - Host latency: 0.918701 ms (enqueue 0.306641 ms)
|
| 439 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689453 ms - Host latency: 0.916455 ms (enqueue 0.305151 ms)
|
| 440 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690161 ms - Host latency: 0.91897 ms (enqueue 0.312524 ms)
|
| 441 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689624 ms - Host latency: 0.91853 ms (enqueue 0.305322 ms)
|
| 442 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689478 ms - Host latency: 0.918066 ms (enqueue 0.308813 ms)
|
| 443 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690771 ms - Host latency: 0.919727 ms (enqueue 0.306445 ms)
|
| 444 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689917 ms - Host latency: 0.916846 ms (enqueue 0.307715 ms)
|
| 445 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689795 ms - Host latency: 0.919019 ms (enqueue 0.308496 ms)
|
| 446 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691406 ms - Host latency: 0.91958 ms (enqueue 0.328516 ms)
|
| 447 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690747 ms - Host latency: 0.917603 ms (enqueue 0.340942 ms)
|
| 448 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690088 ms - Host latency: 0.918726 ms (enqueue 0.323804 ms)
|
| 449 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690112 ms - Host latency: 0.917627 ms (enqueue 0.310669 ms)
|
| 450 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689966 ms - Host latency: 0.918042 ms (enqueue 0.313257 ms)
|
| 451 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69104 ms - Host latency: 0.92019 ms (enqueue 0.315601 ms)
|
| 452 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690161 ms - Host latency: 0.919434 ms (enqueue 0.308643 ms)
|
| 453 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689673 ms - Host latency: 0.917895 ms (enqueue 0.308374 ms)
|
| 454 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689697 ms - Host latency: 0.918555 ms (enqueue 0.305469 ms)
|
| 455 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689478 ms - Host latency: 0.917969 ms (enqueue 0.321387 ms)
|
| 456 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689795 ms - Host latency: 0.917798 ms (enqueue 0.396948 ms)
|
| 457 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689771 ms - Host latency: 0.917725 ms (enqueue 0.323877 ms)
|
| 458 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690259 ms - Host latency: 0.917871 ms (enqueue 0.313403 ms)
|
| 459 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690332 ms - Host latency: 0.919312 ms (enqueue 0.302246 ms)
|
| 460 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.68999 ms - Host latency: 0.918457 ms (enqueue 0.308813 ms)
|
| 461 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690186 ms - Host latency: 0.918774 ms (enqueue 0.299097 ms)
|
| 462 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690601 ms - Host latency: 0.918335 ms (enqueue 0.303711 ms)
|
| 463 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690356 ms - Host latency: 0.919067 ms (enqueue 0.299097 ms)
|
| 464 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689893 ms - Host latency: 0.918799 ms (enqueue 0.297729 ms)
|
| 465 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689111 ms - Host latency: 0.917554 ms (enqueue 0.306226 ms)
|
| 466 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689893 ms - Host latency: 0.91875 ms (enqueue 0.29668 ms)
|
| 467 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690137 ms - Host latency: 0.918823 ms (enqueue 0.327393 ms)
|
| 468 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690479 ms - Host latency: 0.91897 ms (enqueue 0.31355 ms)
|
| 469 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691504 ms - Host latency: 0.919629 ms (enqueue 0.299219 ms)
|
| 470 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689746 ms - Host latency: 0.918433 ms (enqueue 0.310107 ms)
|
| 471 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.694556 ms - Host latency: 0.920459 ms (enqueue 0.55293 ms)
|
| 472 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69707 ms - Host latency: 0.925537 ms (enqueue 0.54646 ms)
|
| 473 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69978 ms - Host latency: 0.92688 ms (enqueue 0.650195 ms)
|
| 474 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689331 ms - Host latency: 0.918677 ms (enqueue 0.333594 ms)
|
| 475 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690552 ms - Host latency: 0.917627 ms (enqueue 0.350464 ms)
|
| 476 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690405 ms - Host latency: 0.919653 ms (enqueue 0.329175 ms)
|
| 477 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690771 ms - Host latency: 0.919678 ms (enqueue 0.34314 ms)
|
| 478 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690259 ms - Host latency: 0.918604 ms (enqueue 0.310547 ms)
|
| 479 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691528 ms - Host latency: 0.917139 ms (enqueue 0.337305 ms)
|
| 480 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690283 ms - Host latency: 0.918066 ms (enqueue 0.321265 ms)
|
| 481 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690088 ms - Host latency: 0.918994 ms (enqueue 0.30061 ms)
|
| 482 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690088 ms - Host latency: 0.918018 ms (enqueue 0.328687 ms)
|
| 483 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.6896 ms - Host latency: 0.916699 ms (enqueue 0.301367 ms)
|
| 484 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690186 ms - Host latency: 0.918262 ms (enqueue 0.306299 ms)
|
| 485 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689966 ms - Host latency: 0.918872 ms (enqueue 0.300049 ms)
|
| 486 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689624 ms - Host latency: 0.918506 ms (enqueue 0.298901 ms)
|
| 487 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689819 ms - Host latency: 0.918237 ms (enqueue 0.329932 ms)
|
| 488 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689795 ms - Host latency: 0.918091 ms (enqueue 0.309961 ms)
|
| 489 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689966 ms - Host latency: 0.916357 ms (enqueue 0.336572 ms)
|
| 490 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689844 ms - Host latency: 0.91626 ms (enqueue 0.317749 ms)
|
| 491 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689697 ms - Host latency: 0.917627 ms (enqueue 0.312061 ms)
|
| 492 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.68855 ms - Host latency: 0.91687 ms (enqueue 0.330444 ms)
|
| 493 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690381 ms - Host latency: 0.916602 ms (enqueue 0.317407 ms)
|
| 494 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689966 ms - Host latency: 0.916846 ms (enqueue 0.332202 ms)
|
| 495 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689453 ms - Host latency: 0.917212 ms (enqueue 0.316992 ms)
|
| 496 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690088 ms - Host latency: 0.918579 ms (enqueue 0.310327 ms)
|
| 497 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690332 ms - Host latency: 0.918896 ms (enqueue 0.337695 ms)
|
| 498 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690137 ms - Host latency: 0.918921 ms (enqueue 0.323193 ms)
|
| 499 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.6896 ms - Host latency: 0.919165 ms (enqueue 0.311816 ms)
|
| 500 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690332 ms - Host latency: 0.917773 ms (enqueue 0.324341 ms)
|
| 501 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689746 ms - Host latency: 0.918164 ms (enqueue 0.304761 ms)
|
| 502 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689771 ms - Host latency: 0.91875 ms (enqueue 0.31106 ms)
|
| 503 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689648 ms - Host latency: 0.918481 ms (enqueue 0.305933 ms)
|
| 504 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689673 ms - Host latency: 0.916626 ms (enqueue 0.314478 ms)
|
| 505 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689478 ms - Host latency: 0.917236 ms (enqueue 0.322241 ms)
|
| 506 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689478 ms - Host latency: 0.918628 ms (enqueue 0.300293 ms)
|
| 507 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689551 ms - Host latency: 0.919336 ms (enqueue 0.308325 ms)
|
| 508 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690356 ms - Host latency: 0.919434 ms (enqueue 0.301733 ms)
|
| 509 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690112 ms - Host latency: 0.918945 ms (enqueue 0.297461 ms)
|
| 510 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689624 ms - Host latency: 0.916699 ms (enqueue 0.307007 ms)
|
| 511 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689209 ms - Host latency: 0.917432 ms (enqueue 0.298267 ms)
|
| 512 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689844 ms - Host latency: 0.919189 ms (enqueue 0.300391 ms)
|
| 513 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689917 ms - Host latency: 0.918848 ms (enqueue 0.29856 ms)
|
| 514 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690039 ms - Host latency: 0.919165 ms (enqueue 0.29751 ms)
|
| 515 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689502 ms - Host latency: 0.918628 ms (enqueue 0.302295 ms)
|
| 516 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690088 ms - Host latency: 0.917798 ms (enqueue 0.298877 ms)
|
| 517 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690356 ms - Host latency: 0.919092 ms (enqueue 0.299731 ms)
|
| 518 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689551 ms - Host latency: 0.918384 ms (enqueue 0.299585 ms)
|
| 519 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690186 ms - Host latency: 0.91936 ms (enqueue 0.296289 ms)
|
| 520 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689697 ms - Host latency: 0.918066 ms (enqueue 0.307227 ms)
|
| 521 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689526 ms - Host latency: 0.916919 ms (enqueue 0.297559 ms)
|
| 522 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689941 ms - Host latency: 0.918506 ms (enqueue 0.296533 ms)
|
| 523 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689209 ms - Host latency: 0.916406 ms (enqueue 0.328296 ms)
|
| 524 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690234 ms - Host latency: 0.916479 ms (enqueue 0.354785 ms)
|
| 525 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690356 ms - Host latency: 0.918213 ms (enqueue 0.356226 ms)
|
| 526 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690503 ms - Host latency: 0.917114 ms (enqueue 0.350366 ms)
|
| 527 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.698608 ms - Host latency: 0.926367 ms (enqueue 0.798242 ms)
|
| 528 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.694922 ms - Host latency: 0.921069 ms (enqueue 0.75918 ms)
|
| 529 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.698682 ms - Host latency: 0.92605 ms (enqueue 0.724487 ms)
|
| 530 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.697974 ms - Host latency: 0.925366 ms (enqueue 0.73479 ms)
|
| 531 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.696802 ms - Host latency: 0.923779 ms (enqueue 0.744312 ms)
|
| 532 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.698462 ms - Host latency: 0.92666 ms (enqueue 0.7354 ms)
|
| 533 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.694556 ms - Host latency: 0.918848 ms (enqueue 0.416211 ms)
|
| 534 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690576 ms - Host latency: 0.917139 ms (enqueue 0.383203 ms)
|
| 535 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690649 ms - Host latency: 0.919678 ms (enqueue 0.369727 ms)
|
| 536 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690137 ms - Host latency: 0.919507 ms (enqueue 0.33772 ms)
|
| 537 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689429 ms - Host latency: 0.918604 ms (enqueue 0.307129 ms)
|
| 538 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690649 ms - Host latency: 0.918286 ms (enqueue 0.327588 ms)
|
| 539 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689282 ms - Host latency: 0.918213 ms (enqueue 0.318359 ms)
|
| 540 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.68999 ms - Host latency: 0.919214 ms (enqueue 0.356299 ms)
|
| 541 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690112 ms - Host latency: 0.919507 ms (enqueue 0.315234 ms)
|
| 542 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690063 ms - Host latency: 0.918628 ms (enqueue 0.308447 ms)
|
| 543 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690259 ms - Host latency: 0.919482 ms (enqueue 0.316162 ms)
|
| 544 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689453 ms - Host latency: 0.918286 ms (enqueue 0.301514 ms)
|
| 545 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.68999 ms - Host latency: 0.919287 ms (enqueue 0.309766 ms)
|
| 546 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689575 ms - Host latency: 0.918872 ms (enqueue 0.298193 ms)
|
| 547 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690161 ms - Host latency: 0.918188 ms (enqueue 0.334497 ms)
|
| 548 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689429 ms - Host latency: 0.919116 ms (enqueue 0.336182 ms)
|
| 549 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689893 ms - Host latency: 0.919067 ms (enqueue 0.317456 ms)
|
| 550 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689673 ms - Host latency: 0.918042 ms (enqueue 0.323413 ms)
|
| 551 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691089 ms - Host latency: 0.919873 ms (enqueue 0.313867 ms)
|
| 552 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690601 ms - Host latency: 0.920093 ms (enqueue 0.313794 ms)
|
| 553 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689819 ms - Host latency: 0.918945 ms (enqueue 0.315161 ms)
|
| 554 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689624 ms - Host latency: 0.917139 ms (enqueue 0.313135 ms)
|
| 555 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689697 ms - Host latency: 0.917505 ms (enqueue 0.32124 ms)
|
| 556 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689844 ms - Host latency: 0.918896 ms (enqueue 0.317334 ms)
|
| 557 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689697 ms - Host latency: 0.91875 ms (enqueue 0.310889 ms)
|
| 558 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690625 ms - Host latency: 0.919922 ms (enqueue 0.315112 ms)
|
| 559 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690063 ms - Host latency: 0.918774 ms (enqueue 0.313403 ms)
|
| 560 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690601 ms - Host latency: 0.919043 ms (enqueue 0.315259 ms)
|
| 561 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690698 ms - Host latency: 0.919775 ms (enqueue 0.311011 ms)
|
| 562 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689746 ms - Host latency: 0.918286 ms (enqueue 0.311475 ms)
|
| 563 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690015 ms - Host latency: 0.918457 ms (enqueue 0.315015 ms)
|
| 564 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689868 ms - Host latency: 0.919385 ms (enqueue 0.31062 ms)
|
| 565 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689648 ms - Host latency: 0.917895 ms (enqueue 0.315234 ms)
|
| 566 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689478 ms - Host latency: 0.918701 ms (enqueue 0.311011 ms)
|
| 567 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689844 ms - Host latency: 0.918774 ms (enqueue 0.313403 ms)
|
| 568 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.691675 ms - Host latency: 0.920996 ms (enqueue 0.311646 ms)
|
| 569 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689819 ms - Host latency: 0.918945 ms (enqueue 0.310596 ms)
|
| 570 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69082 ms - Host latency: 0.920337 ms (enqueue 0.31416 ms)
|
| 571 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689746 ms - Host latency: 0.918872 ms (enqueue 0.3104 ms)
|
| 572 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690845 ms - Host latency: 0.920166 ms (enqueue 0.314136 ms)
|
| 573 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690405 ms - Host latency: 0.919849 ms (enqueue 0.311157 ms)
|
| 574 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690625 ms - Host latency: 0.919482 ms (enqueue 0.323853 ms)
|
| 575 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.690894 ms - Host latency: 0.918652 ms (enqueue 0.343188 ms)
|
| 576 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.69021 ms - Host latency: 0.919263 ms (enqueue 0.314526 ms)
|
| 577 |
+
[01/20/2026-06:55:08] [I] Average on 10 runs - GPU latency: 0.689722 ms - Host latency: 0.917285 ms (enqueue 0.321753 ms)
|
| 578 |
+
[01/20/2026-06:55:08] [I]
|
| 579 |
+
[01/20/2026-06:55:08] [I] === Performance summary ===
|
| 580 |
+
[01/20/2026-06:55:08] [I] Throughput: 1430.91 qps
|
| 581 |
+
[01/20/2026-06:55:08] [I] Latency: min = 0.907776 ms, max = 1.0553 ms, mean = 0.92099 ms, median = 0.919189 ms, percentile(90%) = 0.92807 ms, percentile(95%) = 0.943848 ms, percentile(99%) = 0.948914 ms
|
| 582 |
+
[01/20/2026-06:55:08] [I] Enqueue Time: min = 0.29187 ms, max = 1.0058 ms, mean = 0.373465 ms, median = 0.311279 ms, percentile(90%) = 0.693726 ms, percentile(95%) = 0.723511 ms, percentile(99%) = 0.834106 ms
|
| 583 |
+
[01/20/2026-06:55:08] [I] H2D Latency: min = 0.209839 ms, max = 0.233582 ms, mean = 0.222989 ms, median = 0.224182 ms, percentile(90%) = 0.22522 ms, percentile(95%) = 0.225586 ms, percentile(99%) = 0.226562 ms
|
| 584 |
+
[01/20/2026-06:55:08] [I] GPU Compute Time: min = 0.685547 ms, max = 0.831238 ms, mean = 0.692792 ms, median = 0.690186 ms, percentile(90%) = 0.700409 ms, percentile(95%) = 0.717773 ms, percentile(99%) = 0.721069 ms
|
| 585 |
+
[01/20/2026-06:55:08] [I] D2H Latency: min = 0.00415039 ms, max = 0.0147705 ms, mean = 0.00521159 ms, median = 0.00457764 ms, percentile(90%) = 0.00585938 ms, percentile(95%) = 0.0127869 ms, percentile(99%) = 0.013916 ms
|
| 586 |
+
[01/20/2026-06:55:08] [I] Total Host Walltime: 3.00228 s
|
| 587 |
+
[01/20/2026-06:55:08] [I] Total GPU Compute Time: 2.97624 s
|
| 588 |
+
[01/20/2026-06:55:08] [I] Explanations of the performance metrics are printed in the verbose logs.
|
| 589 |
+
[01/20/2026-06:55:08] [I]
|
| 590 |
+
&&&& PASSED TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_hgnetv2_n_coco.onnx --saveEngine=checkpoints/deimv2_hgnetv2_n_coco.engine --fp16 --optShapes=images:1x3x640x640,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
deimv2_hgnetv2_n_coco.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab904cc403ddbea1c5c84f14e5fba6ff24452a6152957e77776004db006d6e62
|
| 3 |
+
size 14839741
|
deimv2_hgnetv2_n_coco.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ce67dc3535e345f6ac3f46e735bbc12a93c344aa46cb630f7a047089a88b7e9
|
| 3 |
+
size 14743995
|
deimv2_hgnetv2_pico_coco.engine
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad6574ce56f64f48493d8d8bec0b6f7a5fd4e4aaeae96539bf0c441a28ee2ec0
|
| 3 |
+
size 6250652
|
deimv2_hgnetv2_pico_coco.log
ADDED
|
@@ -0,0 +1,590 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
&&&& RUNNING TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_hgnetv2_pico_coco.onnx --saveEngine=checkpoints/deimv2_hgnetv2_pico_coco.engine --fp16 --optShapes=images:1x3x640x640,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
| 2 |
+
[01/20/2026-06:51:03] [W] optShapes is being broadcasted to minShapes for tensor orig_target_sizes
|
| 3 |
+
[01/20/2026-06:51:03] [W] optShapes is being broadcasted to maxShapes for tensor orig_target_sizes
|
| 4 |
+
[01/20/2026-06:51:03] [W] optShapes is being broadcasted to minShapes for tensor images
|
| 5 |
+
[01/20/2026-06:51:03] [W] optShapes is being broadcasted to maxShapes for tensor images
|
| 6 |
+
[01/20/2026-06:51:03] [W] Weakly-typed networks have been deprecated in TensorRT. You can use the AutoCast tool (https://nvidia.github.io/TensorRT-Model-Optimizer/guides/8_autocast.html) to convert the network to be strongly typed.
|
| 7 |
+
[01/20/2026-06:51:04] [I] === Model Options ===
|
| 8 |
+
[01/20/2026-06:51:04] [I] Format: ONNX
|
| 9 |
+
[01/20/2026-06:51:04] [I] Model: checkpoints/deimv2_hgnetv2_pico_coco.onnx
|
| 10 |
+
[01/20/2026-06:51:04] [I] Output:
|
| 11 |
+
[01/20/2026-06:51:04] [I] === Build Options ===
|
| 12 |
+
[01/20/2026-06:51:04] [I] Memory Pools: workspace: 4096 MiB, dlaSRAM: default, dlaLocalDRAM: default, dlaGlobalDRAM: default, tacticSharedMem: default
|
| 13 |
+
[01/20/2026-06:51:04] [I] avgTiming: 8
|
| 14 |
+
[01/20/2026-06:51:04] [I] Precision: FP32+FP16
|
| 15 |
+
[01/20/2026-06:51:04] [I] LayerPrecisions:
|
| 16 |
+
[01/20/2026-06:51:04] [I] Layer Device Types:
|
| 17 |
+
[01/20/2026-06:51:04] [I] Decomposable Attentions:
|
| 18 |
+
[01/20/2026-06:51:04] [I] Calibration:
|
| 19 |
+
[01/20/2026-06:51:04] [I] Refit: Disabled
|
| 20 |
+
[01/20/2026-06:51:04] [I] Strip weights: Disabled
|
| 21 |
+
[01/20/2026-06:51:04] [I] Version Compatible: Disabled
|
| 22 |
+
[01/20/2026-06:51:04] [I] ONNX Plugin InstanceNorm: Disabled
|
| 23 |
+
[01/20/2026-06:51:04] [I] ONNX kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA flag: Disabled
|
| 24 |
+
[01/20/2026-06:51:04] [I] TensorRT runtime: full
|
| 25 |
+
[01/20/2026-06:51:04] [I] Lean DLL Path:
|
| 26 |
+
[01/20/2026-06:51:04] [I] Tempfile Controls: { in_memory: allow, temporary: allow }
|
| 27 |
+
[01/20/2026-06:51:04] [I] Exclude Lean Runtime: Disabled
|
| 28 |
+
[01/20/2026-06:51:04] [I] Sparsity: Disabled
|
| 29 |
+
[01/20/2026-06:51:04] [I] Safe mode: Disabled
|
| 30 |
+
[01/20/2026-06:51:04] [I] Build DLA standalone loadable: Disabled
|
| 31 |
+
[01/20/2026-06:51:04] [I] Allow GPU fallback for DLA: Disabled
|
| 32 |
+
[01/20/2026-06:51:04] [I] DirectIO mode: Disabled
|
| 33 |
+
[01/20/2026-06:51:04] [I] Restricted mode: Disabled
|
| 34 |
+
[01/20/2026-06:51:04] [I] Skip inference: Disabled
|
| 35 |
+
[01/20/2026-06:51:04] [I] Save engine: checkpoints/deimv2_hgnetv2_pico_coco.engine
|
| 36 |
+
[01/20/2026-06:51:04] [I] Load engine:
|
| 37 |
+
[01/20/2026-06:51:04] [I] Profiling verbosity: 0
|
| 38 |
+
[01/20/2026-06:51:04] [I] Tactic sources: Using default tactic sources
|
| 39 |
+
[01/20/2026-06:51:04] [I] timingCacheMode: local
|
| 40 |
+
[01/20/2026-06:51:04] [I] timingCacheFile:
|
| 41 |
+
[01/20/2026-06:51:04] [I] Enable Compilation Cache: Enabled
|
| 42 |
+
[01/20/2026-06:51:04] [I] Enable Monitor Memory: Disabled
|
| 43 |
+
[01/20/2026-06:51:04] [I] errorOnTimingCacheMiss: Disabled
|
| 44 |
+
[01/20/2026-06:51:04] [I] Preview Features: Use default preview flags.
|
| 45 |
+
[01/20/2026-06:51:04] [I] MaxAuxStreams: -1
|
| 46 |
+
[01/20/2026-06:51:04] [I] BuilderOptimizationLevel: 3
|
| 47 |
+
[01/20/2026-06:51:04] [I] MaxTactics: -1
|
| 48 |
+
[01/20/2026-06:51:04] [I] Calibration Profile Index: 0
|
| 49 |
+
[01/20/2026-06:51:04] [I] Weight Streaming: Disabled
|
| 50 |
+
[01/20/2026-06:51:04] [I] Runtime Platform: Same As Build
|
| 51 |
+
[01/20/2026-06:51:04] [I] Debug Tensors:
|
| 52 |
+
[01/20/2026-06:51:04] [I] Distributive Independence: Disabled
|
| 53 |
+
[01/20/2026-06:51:04] [I] Mark Unfused Tensors As Debug Tensors: Disabled
|
| 54 |
+
[01/20/2026-06:51:04] [I] Input(s)s format: fp32:CHW
|
| 55 |
+
[01/20/2026-06:51:04] [I] Output(s)s format: fp32:CHW
|
| 56 |
+
[01/20/2026-06:51:04] [I] Input build shape (profile 0): images=1x3x640x640+1x3x640x640+1x3x640x640
|
| 57 |
+
[01/20/2026-06:51:04] [I] Input build shape (profile 0): orig_target_sizes=1x2+1x2+1x2
|
| 58 |
+
[01/20/2026-06:51:04] [I] Input calibration shapes: model
|
| 59 |
+
[01/20/2026-06:51:04] [I] === System Options ===
|
| 60 |
+
[01/20/2026-06:51:04] [I] Device: 0
|
| 61 |
+
[01/20/2026-06:51:04] [I] DLACore:
|
| 62 |
+
[01/20/2026-06:51:04] [I] Plugins:
|
| 63 |
+
[01/20/2026-06:51:04] [I] setPluginsToSerialize:
|
| 64 |
+
[01/20/2026-06:51:04] [I] dynamicPlugins:
|
| 65 |
+
[01/20/2026-06:51:04] [I] ignoreParsedPluginLibs: 0
|
| 66 |
+
[01/20/2026-06:51:04] [I]
|
| 67 |
+
[01/20/2026-06:51:04] [I] === Inference Options ===
|
| 68 |
+
[01/20/2026-06:51:04] [I] Batch: Explicit
|
| 69 |
+
[01/20/2026-06:51:04] [I] Input inference shape : orig_target_sizes=1x2
|
| 70 |
+
[01/20/2026-06:51:04] [I] Input inference shape : images=1x3x640x640
|
| 71 |
+
[01/20/2026-06:51:04] [I] Iterations: 10
|
| 72 |
+
[01/20/2026-06:51:04] [I] Duration: 3s (+ 200ms warm up)
|
| 73 |
+
[01/20/2026-06:51:04] [I] Sleep time: 0ms
|
| 74 |
+
[01/20/2026-06:51:04] [I] Idle time: 0ms
|
| 75 |
+
[01/20/2026-06:51:04] [I] Inference Streams: 1
|
| 76 |
+
[01/20/2026-06:51:04] [I] ExposeDMA: Disabled
|
| 77 |
+
[01/20/2026-06:51:04] [I] Data transfers: Enabled
|
| 78 |
+
[01/20/2026-06:51:04] [I] Spin-wait: Disabled
|
| 79 |
+
[01/20/2026-06:51:04] [I] Multithreading: Disabled
|
| 80 |
+
[01/20/2026-06:51:04] [I] CUDA Graph: Disabled
|
| 81 |
+
[01/20/2026-06:51:04] [I] Separate profiling: Disabled
|
| 82 |
+
[01/20/2026-06:51:04] [I] Time Deserialize: Disabled
|
| 83 |
+
[01/20/2026-06:51:04] [I] Time Refit: Disabled
|
| 84 |
+
[01/20/2026-06:51:04] [I] NVTX verbosity: 0
|
| 85 |
+
[01/20/2026-06:51:04] [I] Persistent Cache Ratio: 0
|
| 86 |
+
[01/20/2026-06:51:04] [I] Optimization Profile Index: 0
|
| 87 |
+
[01/20/2026-06:51:04] [I] Weight Streaming Budget: 100.000000%
|
| 88 |
+
[01/20/2026-06:51:04] [I] Inputs:
|
| 89 |
+
[01/20/2026-06:51:04] [I] Debug Tensor Save Destinations:
|
| 90 |
+
[01/20/2026-06:51:04] [I] Dump All Debug Tensor in Formats:
|
| 91 |
+
[01/20/2026-06:51:04] [I] === Reporting Options ===
|
| 92 |
+
[01/20/2026-06:51:04] [I] Verbose: Disabled
|
| 93 |
+
[01/20/2026-06:51:04] [I] Averages: 10 inferences
|
| 94 |
+
[01/20/2026-06:51:04] [I] Percentiles: 90,95,99
|
| 95 |
+
[01/20/2026-06:51:04] [I] Dump refittable layers:Disabled
|
| 96 |
+
[01/20/2026-06:51:04] [I] Dump output: Disabled
|
| 97 |
+
[01/20/2026-06:51:04] [I] Profile: Disabled
|
| 98 |
+
[01/20/2026-06:51:04] [I] Export timing to JSON file:
|
| 99 |
+
[01/20/2026-06:51:04] [I] Export output to JSON file:
|
| 100 |
+
[01/20/2026-06:51:04] [I] Export profile to JSON file:
|
| 101 |
+
[01/20/2026-06:51:04] [I]
|
| 102 |
+
[01/20/2026-06:51:04] [I] === Device Information ===
|
| 103 |
+
[01/20/2026-06:51:04] [I] Available Devices:
|
| 104 |
+
[01/20/2026-06:51:04] [I] Device 0: "NVIDIA GeForce RTX 4090" UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 105 |
+
[01/20/2026-06:51:04] [I] Selected Device: NVIDIA GeForce RTX 4090
|
| 106 |
+
[01/20/2026-06:51:04] [I] Selected Device ID: 0
|
| 107 |
+
[01/20/2026-06:51:04] [I] Selected Device UUID: GPU-55c23db9-433c-0d6c-46e7-9387266e5ddb
|
| 108 |
+
[01/20/2026-06:51:04] [I] Compute Capability: 8.9
|
| 109 |
+
[01/20/2026-06:51:04] [I] SMs: 128
|
| 110 |
+
[01/20/2026-06:51:04] [I] Device Global Memory: 24071 MiB
|
| 111 |
+
[01/20/2026-06:51:04] [I] Shared Memory per SM: 100 KiB
|
| 112 |
+
[01/20/2026-06:51:04] [I] Memory Bus Width: 384 bits (ECC disabled)
|
| 113 |
+
[01/20/2026-06:51:04] [I] Application Compute Clock Rate: 2.52 GHz
|
| 114 |
+
[01/20/2026-06:51:04] [I] Application Memory Clock Rate: 10.501 GHz
|
| 115 |
+
[01/20/2026-06:51:04] [I]
|
| 116 |
+
[01/20/2026-06:51:04] [I] Note: The application clock rates do not reflect the actual clock rates that the GPU is currently running at.
|
| 117 |
+
[01/20/2026-06:51:04] [I]
|
| 118 |
+
[01/20/2026-06:51:04] [I] TensorRT version: 10.14.1
|
| 119 |
+
[01/20/2026-06:51:04] [I] Loading standard plugins
|
| 120 |
+
[01/20/2026-06:51:04] [I] [TRT] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 29, GPU 403 (MiB)
|
| 121 |
+
[01/20/2026-06:51:04] [I] Start parsing network model.
|
| 122 |
+
[01/20/2026-06:51:04] [I] [TRT] ----------------------------------------------------------------
|
| 123 |
+
[01/20/2026-06:51:04] [I] [TRT] Input filename: checkpoints/deimv2_hgnetv2_pico_coco.onnx
|
| 124 |
+
[01/20/2026-06:51:04] [I] [TRT] ONNX IR version: 0.0.8
|
| 125 |
+
[01/20/2026-06:51:04] [I] [TRT] Opset version: 17
|
| 126 |
+
[01/20/2026-06:51:04] [I] [TRT] Producer name: pytorch
|
| 127 |
+
[01/20/2026-06:51:04] [I] [TRT] Producer version: 2.10.0
|
| 128 |
+
[01/20/2026-06:51:04] [I] [TRT] Domain:
|
| 129 |
+
[01/20/2026-06:51:04] [I] [TRT] Model version: 0
|
| 130 |
+
[01/20/2026-06:51:04] [I] [TRT] Doc string:
|
| 131 |
+
[01/20/2026-06:51:04] [I] [TRT] ----------------------------------------------------------------
|
| 132 |
+
[01/20/2026-06:51:04] [W] [TRT] ModelImporter.cpp:661: Make sure input orig_target_sizes has Int64 binding.
|
| 133 |
+
[01/20/2026-06:51:04] [W] [TRT] ModelImporter.cpp:908: Make sure output labels has Int64 binding.
|
| 134 |
+
[01/20/2026-06:51:04] [I] Finished parsing network model. Parse time: 0.0253585
|
| 135 |
+
[01/20/2026-06:51:04] [I] Set shape of input tensor images for optimization profile 0 to: MIN=1x3x640x640 OPT=1x3x640x640 MAX=1x3x640x640
|
| 136 |
+
[01/20/2026-06:51:04] [I] Set shape of input tensor orig_target_sizes for optimization profile 0 to: MIN=1x2 OPT=1x2 MAX=1x2
|
| 137 |
+
[01/20/2026-06:51:04] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +205, GPU +4, now: CPU 517, GPU 407 (MiB)
|
| 138 |
+
[01/20/2026-06:51:04] [W] [TRT] Detected layernorm nodes in FP16.
|
| 139 |
+
[01/20/2026-06:51:04] [W] [TRT] Running layernorm after self-attention with FP16 Reduce or Pow may cause overflow. Forcing Reduce or Pow Layers in FP32 precision, or exporting the model to use INormalizationLayer (available with ONNX opset >= 17) can help preserving accuracy.
|
| 140 |
+
[01/20/2026-06:51:04] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored.
|
| 141 |
+
[01/20/2026-06:52:29] [I] [TRT] Compiler backend is used during engine build.
|
| 142 |
+
[01/20/2026-06:52:59] [I] [TRT] Detected 2 inputs and 3 output network tensors.
|
| 143 |
+
[01/20/2026-06:52:59] [I] [TRT] Total Host Persistent Memory: 360640 bytes
|
| 144 |
+
[01/20/2026-06:52:59] [I] [TRT] Total Device Persistent Memory: 0 bytes
|
| 145 |
+
[01/20/2026-06:52:59] [I] [TRT] Max Scratch Memory: 1573376 bytes
|
| 146 |
+
[01/20/2026-06:52:59] [I] [TRT] [BlockAssignment] Started assigning block shifts. This will take 97 steps to complete.
|
| 147 |
+
[01/20/2026-06:52:59] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 2.20296ms to assign 10 blocks to 97 nodes requiring 12479488 bytes.
|
| 148 |
+
[01/20/2026-06:52:59] [I] [TRT] Total Activation Memory: 12479488 bytes
|
| 149 |
+
[01/20/2026-06:52:59] [I] [TRT] Total Weights Memory: 3151328 bytes
|
| 150 |
+
[01/20/2026-06:53:00] [I] [TRT] Compiler backend is used during engine execution.
|
| 151 |
+
[01/20/2026-06:53:00] [I] [TRT] Engine generation completed in 115.6 seconds.
|
| 152 |
+
[01/20/2026-06:53:00] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 1 MiB, GPU 43 MiB
|
| 153 |
+
[01/20/2026-06:53:00] [I] Created engine with size: 5.96109 MiB
|
| 154 |
+
[01/20/2026-06:53:00] [I] Engine built in 115.835 sec.
|
| 155 |
+
[01/20/2026-06:53:00] [I] [TRT] Loaded engine size: 5 MiB
|
| 156 |
+
[01/20/2026-06:53:00] [I] Engine deserialized in 0.0109609 sec.
|
| 157 |
+
[01/20/2026-06:53:00] [I] [TRT] [MS] Running engine with multi stream info
|
| 158 |
+
[01/20/2026-06:53:00] [I] [TRT] [MS] Number of aux streams is 2
|
| 159 |
+
[01/20/2026-06:53:00] [I] [TRT] [MS] Number of total worker streams is 3
|
| 160 |
+
[01/20/2026-06:53:00] [I] [TRT] [MS] The main stream provided by execute/enqueue calls is the first worker stream
|
| 161 |
+
[01/20/2026-06:53:00] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +11, now: CPU 0, GPU 14 (MiB)
|
| 162 |
+
[01/20/2026-06:53:00] [I] Setting persistentCacheLimit to 0 bytes.
|
| 163 |
+
[01/20/2026-06:53:00] [I] Created execution context with device memory size: 11.9014 MiB
|
| 164 |
+
[01/20/2026-06:53:00] [I] Using random values for input images
|
| 165 |
+
[01/20/2026-06:53:00] [I] Input binding for images with dimensions 1x3x640x640 is created.
|
| 166 |
+
[01/20/2026-06:53:00] [I] Using random values for input orig_target_sizes
|
| 167 |
+
[01/20/2026-06:53:00] [I] Input binding for orig_target_sizes with dimensions 1x2 is created.
|
| 168 |
+
[01/20/2026-06:53:00] [I] Output binding for labels with dimensions 1x300 is created.
|
| 169 |
+
[01/20/2026-06:53:00] [I] Output binding for boxes with dimensions 1x300x4 is created.
|
| 170 |
+
[01/20/2026-06:53:00] [I] Output binding for scores with dimensions 1x300 is created.
|
| 171 |
+
[01/20/2026-06:53:00] [I] Starting inference
|
| 172 |
+
[01/20/2026-06:53:03] [I] Warmup completed 325 queries over 200 ms
|
| 173 |
+
[01/20/2026-06:53:03] [I] Timing trace has 4901 queries over 3.00189 s
|
| 174 |
+
[01/20/2026-06:53:03] [I]
|
| 175 |
+
[01/20/2026-06:53:03] [I] === Trace details ===
|
| 176 |
+
[01/20/2026-06:53:03] [I] Trace averages of 10 runs:
|
| 177 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610017 ms - Host latency: 0.839949 ms (enqueue 0.271513 ms)
|
| 178 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610837 ms - Host latency: 0.839828 ms (enqueue 0.269579 ms)
|
| 179 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610509 ms - Host latency: 0.840181 ms (enqueue 0.26765 ms)
|
| 180 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610098 ms - Host latency: 0.839499 ms (enqueue 0.272404 ms)
|
| 181 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609996 ms - Host latency: 0.840347 ms (enqueue 0.269255 ms)
|
| 182 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610919 ms - Host latency: 0.842126 ms (enqueue 0.271313 ms)
|
| 183 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610535 ms - Host latency: 0.84382 ms (enqueue 0.270197 ms)
|
| 184 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610725 ms - Host latency: 0.844295 ms (enqueue 0.27135 ms)
|
| 185 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611229 ms - Host latency: 0.842973 ms (enqueue 0.274615 ms)
|
| 186 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61033 ms - Host latency: 0.840291 ms (enqueue 0.268875 ms)
|
| 187 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609824 ms - Host latency: 0.838998 ms (enqueue 0.266461 ms)
|
| 188 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610217 ms - Host latency: 0.839285 ms (enqueue 0.281149 ms)
|
| 189 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610397 ms - Host latency: 0.84021 ms (enqueue 0.266776 ms)
|
| 190 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610529 ms - Host latency: 0.83905 ms (enqueue 0.265906 ms)
|
| 191 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610645 ms - Host latency: 0.839963 ms (enqueue 0.269089 ms)
|
| 192 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610513 ms - Host latency: 0.841379 ms (enqueue 0.276239 ms)
|
| 193 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610407 ms - Host latency: 0.842798 ms (enqueue 0.269412 ms)
|
| 194 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610614 ms - Host latency: 0.841885 ms (enqueue 0.267557 ms)
|
| 195 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610715 ms - Host latency: 0.84137 ms (enqueue 0.268875 ms)
|
| 196 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610913 ms - Host latency: 0.843088 ms (enqueue 0.267282 ms)
|
| 197 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.6099 ms - Host latency: 0.841846 ms (enqueue 0.267703 ms)
|
| 198 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610916 ms - Host latency: 0.842267 ms (enqueue 0.267447 ms)
|
| 199 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61051 ms - Host latency: 0.838229 ms (enqueue 0.306476 ms)
|
| 200 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610931 ms - Host latency: 0.840082 ms (enqueue 0.263297 ms)
|
| 201 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61051 ms - Host latency: 0.841263 ms (enqueue 0.26236 ms)
|
| 202 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609891 ms - Host latency: 0.841657 ms (enqueue 0.263376 ms)
|
| 203 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.60979 ms - Host latency: 0.840454 ms (enqueue 0.265164 ms)
|
| 204 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610114 ms - Host latency: 0.840308 ms (enqueue 0.263965 ms)
|
| 205 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609921 ms - Host latency: 0.839874 ms (enqueue 0.264322 ms)
|
| 206 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611124 ms - Host latency: 0.841544 ms (enqueue 0.264243 ms)
|
| 207 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610318 ms - Host latency: 0.840457 ms (enqueue 0.266019 ms)
|
| 208 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609912 ms - Host latency: 0.840503 ms (enqueue 0.265372 ms)
|
| 209 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610245 ms - Host latency: 0.840985 ms (enqueue 0.277084 ms)
|
| 210 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610614 ms - Host latency: 0.843039 ms (enqueue 0.269421 ms)
|
| 211 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611374 ms - Host latency: 0.844342 ms (enqueue 0.267914 ms)
|
| 212 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610202 ms - Host latency: 0.839642 ms (enqueue 0.268436 ms)
|
| 213 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610223 ms - Host latency: 0.840256 ms (enqueue 0.268814 ms)
|
| 214 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610306 ms - Host latency: 0.839551 ms (enqueue 0.268353 ms)
|
| 215 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61051 ms - Host latency: 0.84021 ms (enqueue 0.27561 ms)
|
| 216 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610107 ms - Host latency: 0.839691 ms (enqueue 0.271695 ms)
|
| 217 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.6103 ms - Host latency: 0.839352 ms (enqueue 0.26886 ms)
|
| 218 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610001 ms - Host latency: 0.84028 ms (enqueue 0.268594 ms)
|
| 219 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611023 ms - Host latency: 0.840265 ms (enqueue 0.268597 ms)
|
| 220 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611023 ms - Host latency: 0.843723 ms (enqueue 0.26731 ms)
|
| 221 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610919 ms - Host latency: 0.844116 ms (enqueue 0.266803 ms)
|
| 222 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610352 ms - Host latency: 0.843213 ms (enqueue 0.268024 ms)
|
| 223 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610721 ms - Host latency: 0.842834 ms (enqueue 0.269043 ms)
|
| 224 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610117 ms - Host latency: 0.83931 ms (enqueue 0.267264 ms)
|
| 225 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610507 ms - Host latency: 0.839636 ms (enqueue 0.26687 ms)
|
| 226 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610718 ms - Host latency: 0.839484 ms (enqueue 0.269543 ms)
|
| 227 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611017 ms - Host latency: 0.840094 ms (enqueue 0.267816 ms)
|
| 228 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610413 ms - Host latency: 0.840204 ms (enqueue 0.266925 ms)
|
| 229 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611011 ms - Host latency: 0.842542 ms (enqueue 0.268414 ms)
|
| 230 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610364 ms - Host latency: 0.840881 ms (enqueue 0.267761 ms)
|
| 231 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610907 ms - Host latency: 0.842725 ms (enqueue 0.268365 ms)
|
| 232 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610809 ms - Host latency: 0.841303 ms (enqueue 0.279889 ms)
|
| 233 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610211 ms - Host latency: 0.838586 ms (enqueue 0.278998 ms)
|
| 234 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610522 ms - Host latency: 0.842017 ms (enqueue 0.273444 ms)
|
| 235 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611121 ms - Host latency: 0.842773 ms (enqueue 0.27243 ms)
|
| 236 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610828 ms - Host latency: 0.842346 ms (enqueue 0.271936 ms)
|
| 237 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610413 ms - Host latency: 0.839496 ms (enqueue 0.276007 ms)
|
| 238 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610669 ms - Host latency: 0.839801 ms (enqueue 0.274835 ms)
|
| 239 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610754 ms - Host latency: 0.841241 ms (enqueue 0.280914 ms)
|
| 240 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610437 ms - Host latency: 0.840558 ms (enqueue 0.294635 ms)
|
| 241 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610339 ms - Host latency: 0.840021 ms (enqueue 0.26449 ms)
|
| 242 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610205 ms - Host latency: 0.838898 ms (enqueue 0.269666 ms)
|
| 243 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610242 ms - Host latency: 0.83786 ms (enqueue 0.274219 ms)
|
| 244 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610291 ms - Host latency: 0.840387 ms (enqueue 0.274866 ms)
|
| 245 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610608 ms - Host latency: 0.841443 ms (enqueue 0.279211 ms)
|
| 246 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610443 ms - Host latency: 0.839301 ms (enqueue 0.267194 ms)
|
| 247 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610919 ms - Host latency: 0.842664 ms (enqueue 0.266315 ms)
|
| 248 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611224 ms - Host latency: 0.846033 ms (enqueue 0.26416 ms)
|
| 249 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611127 ms - Host latency: 0.843726 ms (enqueue 0.262158 ms)
|
| 250 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610394 ms - Host latency: 0.839636 ms (enqueue 0.262982 ms)
|
| 251 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610315 ms - Host latency: 0.840332 ms (enqueue 0.26734 ms)
|
| 252 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611035 ms - Host latency: 0.840308 ms (enqueue 0.262952 ms)
|
| 253 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611047 ms - Host latency: 0.840961 ms (enqueue 0.266699 ms)
|
| 254 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610724 ms - Host latency: 0.840253 ms (enqueue 0.268207 ms)
|
| 255 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610327 ms - Host latency: 0.839795 ms (enqueue 0.266382 ms)
|
| 256 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611041 ms - Host latency: 0.840588 ms (enqueue 0.266467 ms)
|
| 257 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611127 ms - Host latency: 0.843909 ms (enqueue 0.267889 ms)
|
| 258 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61153 ms - Host latency: 0.843707 ms (enqueue 0.266235 ms)
|
| 259 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610925 ms - Host latency: 0.842651 ms (enqueue 0.266803 ms)
|
| 260 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611157 ms - Host latency: 0.843243 ms (enqueue 0.265833 ms)
|
| 261 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610901 ms - Host latency: 0.842596 ms (enqueue 0.276019 ms)
|
| 262 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61051 ms - Host latency: 0.840997 ms (enqueue 0.280133 ms)
|
| 263 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610919 ms - Host latency: 0.83916 ms (enqueue 0.275739 ms)
|
| 264 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610956 ms - Host latency: 0.839343 ms (enqueue 0.273511 ms)
|
| 265 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610089 ms - Host latency: 0.838397 ms (enqueue 0.276715 ms)
|
| 266 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610608 ms - Host latency: 0.841327 ms (enqueue 0.273163 ms)
|
| 267 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610339 ms - Host latency: 0.84176 ms (enqueue 0.273248 ms)
|
| 268 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.6099 ms - Host latency: 0.840082 ms (enqueue 0.276697 ms)
|
| 269 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611066 ms - Host latency: 0.842468 ms (enqueue 0.273657 ms)
|
| 270 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610803 ms - Host latency: 0.84173 ms (enqueue 0.272455 ms)
|
| 271 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610425 ms - Host latency: 0.840057 ms (enqueue 0.273499 ms)
|
| 272 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610724 ms - Host latency: 0.839661 ms (enqueue 0.276593 ms)
|
| 273 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610834 ms - Host latency: 0.840137 ms (enqueue 0.274701 ms)
|
| 274 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610181 ms - Host latency: 0.841089 ms (enqueue 0.273004 ms)
|
| 275 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610785 ms - Host latency: 0.842346 ms (enqueue 0.27337 ms)
|
| 276 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610101 ms - Host latency: 0.840485 ms (enqueue 0.275836 ms)
|
| 277 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609827 ms - Host latency: 0.841125 ms (enqueue 0.282294 ms)
|
| 278 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610004 ms - Host latency: 0.839209 ms (enqueue 0.264563 ms)
|
| 279 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610608 ms - Host latency: 0.840552 ms (enqueue 0.263129 ms)
|
| 280 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610602 ms - Host latency: 0.839355 ms (enqueue 0.265387 ms)
|
| 281 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610449 ms - Host latency: 0.839117 ms (enqueue 0.28114 ms)
|
| 282 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61084 ms - Host latency: 0.841345 ms (enqueue 0.29455 ms)
|
| 283 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610254 ms - Host latency: 0.840839 ms (enqueue 0.280859 ms)
|
| 284 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609924 ms - Host latency: 0.840149 ms (enqueue 0.283215 ms)
|
| 285 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611011 ms - Host latency: 0.84527 ms (enqueue 0.271826 ms)
|
| 286 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610974 ms - Host latency: 0.844922 ms (enqueue 0.271936 ms)
|
| 287 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610846 ms - Host latency: 0.843536 ms (enqueue 0.267419 ms)
|
| 288 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610321 ms - Host latency: 0.840308 ms (enqueue 0.26767 ms)
|
| 289 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610608 ms - Host latency: 0.839838 ms (enqueue 0.267047 ms)
|
| 290 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610413 ms - Host latency: 0.83996 ms (enqueue 0.267084 ms)
|
| 291 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610443 ms - Host latency: 0.839862 ms (enqueue 0.266284 ms)
|
| 292 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610614 ms - Host latency: 0.840576 ms (enqueue 0.266522 ms)
|
| 293 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609705 ms - Host latency: 0.837933 ms (enqueue 0.281317 ms)
|
| 294 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610101 ms - Host latency: 0.840796 ms (enqueue 0.266113 ms)
|
| 295 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61123 ms - Host latency: 0.843939 ms (enqueue 0.265582 ms)
|
| 296 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61051 ms - Host latency: 0.841571 ms (enqueue 0.266559 ms)
|
| 297 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611639 ms - Host latency: 0.842554 ms (enqueue 0.266095 ms)
|
| 298 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611017 ms - Host latency: 0.842853 ms (enqueue 0.265875 ms)
|
| 299 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610144 ms - Host latency: 0.842212 ms (enqueue 0.265833 ms)
|
| 300 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610217 ms - Host latency: 0.840704 ms (enqueue 0.278912 ms)
|
| 301 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610193 ms - Host latency: 0.840271 ms (enqueue 0.269995 ms)
|
| 302 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610059 ms - Host latency: 0.839514 ms (enqueue 0.266388 ms)
|
| 303 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610522 ms - Host latency: 0.840009 ms (enqueue 0.266199 ms)
|
| 304 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610724 ms - Host latency: 0.842047 ms (enqueue 0.266852 ms)
|
| 305 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610608 ms - Host latency: 0.841412 ms (enqueue 0.268854 ms)
|
| 306 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610516 ms - Host latency: 0.840967 ms (enqueue 0.275183 ms)
|
| 307 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609729 ms - Host latency: 0.839624 ms (enqueue 0.266852 ms)
|
| 308 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610748 ms - Host latency: 0.84137 ms (enqueue 0.268671 ms)
|
| 309 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611121 ms - Host latency: 0.841913 ms (enqueue 0.270264 ms)
|
| 310 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609698 ms - Host latency: 0.837726 ms (enqueue 0.267316 ms)
|
| 311 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610321 ms - Host latency: 0.841772 ms (enqueue 0.266486 ms)
|
| 312 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610657 ms - Host latency: 0.842078 ms (enqueue 0.267517 ms)
|
| 313 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611292 ms - Host latency: 0.844556 ms (enqueue 0.273547 ms)
|
| 314 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611035 ms - Host latency: 0.842432 ms (enqueue 0.268201 ms)
|
| 315 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610107 ms - Host latency: 0.839539 ms (enqueue 0.268005 ms)
|
| 316 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610449 ms - Host latency: 0.839893 ms (enqueue 0.271875 ms)
|
| 317 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610828 ms - Host latency: 0.840002 ms (enqueue 0.267957 ms)
|
| 318 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610339 ms - Host latency: 0.839648 ms (enqueue 0.269507 ms)
|
| 319 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610071 ms - Host latency: 0.839709 ms (enqueue 0.267639 ms)
|
| 320 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610181 ms - Host latency: 0.838477 ms (enqueue 0.267578 ms)
|
| 321 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610242 ms - Host latency: 0.84071 ms (enqueue 0.267102 ms)
|
| 322 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.6104 ms - Host latency: 0.842322 ms (enqueue 0.271399 ms)
|
| 323 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611035 ms - Host latency: 0.842822 ms (enqueue 0.27312 ms)
|
| 324 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610791 ms - Host latency: 0.844202 ms (enqueue 0.273242 ms)
|
| 325 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610937 ms - Host latency: 0.844104 ms (enqueue 0.288928 ms)
|
| 326 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610925 ms - Host latency: 0.842273 ms (enqueue 0.275781 ms)
|
| 327 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610254 ms - Host latency: 0.838354 ms (enqueue 0.28197 ms)
|
| 328 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611121 ms - Host latency: 0.838245 ms (enqueue 0.27771 ms)
|
| 329 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610901 ms - Host latency: 0.84032 ms (enqueue 0.274231 ms)
|
| 330 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.60979 ms - Host latency: 0.838623 ms (enqueue 0.274426 ms)
|
| 331 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610144 ms - Host latency: 0.840906 ms (enqueue 0.272253 ms)
|
| 332 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610937 ms - Host latency: 0.842029 ms (enqueue 0.26676 ms)
|
| 333 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610681 ms - Host latency: 0.842297 ms (enqueue 0.266504 ms)
|
| 334 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611023 ms - Host latency: 0.842004 ms (enqueue 0.269336 ms)
|
| 335 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610388 ms - Host latency: 0.841016 ms (enqueue 0.272339 ms)
|
| 336 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610608 ms - Host latency: 0.839929 ms (enqueue 0.270142 ms)
|
| 337 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610852 ms - Host latency: 0.843384 ms (enqueue 0.281396 ms)
|
| 338 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610571 ms - Host latency: 0.841382 ms (enqueue 0.272815 ms)
|
| 339 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610315 ms - Host latency: 0.83866 ms (enqueue 0.271411 ms)
|
| 340 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610132 ms - Host latency: 0.838647 ms (enqueue 0.270825 ms)
|
| 341 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610352 ms - Host latency: 0.840588 ms (enqueue 0.269043 ms)
|
| 342 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610071 ms - Host latency: 0.841406 ms (enqueue 0.280847 ms)
|
| 343 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610437 ms - Host latency: 0.840002 ms (enqueue 0.267139 ms)
|
| 344 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610303 ms - Host latency: 0.840247 ms (enqueue 0.267566 ms)
|
| 345 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609998 ms - Host latency: 0.839221 ms (enqueue 0.266748 ms)
|
| 346 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610107 ms - Host latency: 0.840747 ms (enqueue 0.26853 ms)
|
| 347 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610498 ms - Host latency: 0.840942 ms (enqueue 0.267749 ms)
|
| 348 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610205 ms - Host latency: 0.839294 ms (enqueue 0.269092 ms)
|
| 349 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61095 ms - Host latency: 0.842065 ms (enqueue 0.271191 ms)
|
| 350 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610852 ms - Host latency: 0.844727 ms (enqueue 0.268457 ms)
|
| 351 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61123 ms - Host latency: 0.843921 ms (enqueue 0.267285 ms)
|
| 352 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61084 ms - Host latency: 0.842358 ms (enqueue 0.268091 ms)
|
| 353 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609973 ms - Host latency: 0.839844 ms (enqueue 0.268396 ms)
|
| 354 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609985 ms - Host latency: 0.83927 ms (enqueue 0.267456 ms)
|
| 355 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61001 ms - Host latency: 0.838428 ms (enqueue 0.267163 ms)
|
| 356 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609802 ms - Host latency: 0.837842 ms (enqueue 0.266895 ms)
|
| 357 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.612134 ms - Host latency: 0.839697 ms (enqueue 0.314233 ms)
|
| 358 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610754 ms - Host latency: 0.83988 ms (enqueue 0.269788 ms)
|
| 359 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610449 ms - Host latency: 0.841846 ms (enqueue 0.266504 ms)
|
| 360 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610754 ms - Host latency: 0.843774 ms (enqueue 0.266968 ms)
|
| 361 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610693 ms - Host latency: 0.842249 ms (enqueue 0.266162 ms)
|
| 362 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611108 ms - Host latency: 0.843762 ms (enqueue 0.266431 ms)
|
| 363 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610522 ms - Host latency: 0.843628 ms (enqueue 0.266309 ms)
|
| 364 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611011 ms - Host latency: 0.842346 ms (enqueue 0.266797 ms)
|
| 365 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610522 ms - Host latency: 0.840247 ms (enqueue 0.266833 ms)
|
| 366 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610254 ms - Host latency: 0.839685 ms (enqueue 0.266785 ms)
|
| 367 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610632 ms - Host latency: 0.839661 ms (enqueue 0.266089 ms)
|
| 368 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610205 ms - Host latency: 0.839832 ms (enqueue 0.266992 ms)
|
| 369 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610205 ms - Host latency: 0.842126 ms (enqueue 0.270129 ms)
|
| 370 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610999 ms - Host latency: 0.842371 ms (enqueue 0.26897 ms)
|
| 371 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610291 ms - Host latency: 0.840308 ms (enqueue 0.266943 ms)
|
| 372 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610547 ms - Host latency: 0.841394 ms (enqueue 0.267224 ms)
|
| 373 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61062 ms - Host latency: 0.841187 ms (enqueue 0.267249 ms)
|
| 374 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610498 ms - Host latency: 0.840418 ms (enqueue 0.267493 ms)
|
| 375 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610669 ms - Host latency: 0.841772 ms (enqueue 0.266162 ms)
|
| 376 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610803 ms - Host latency: 0.842798 ms (enqueue 0.266357 ms)
|
| 377 |
+
[01/20/2026-06:53:03] [I] ... Omitting 901 lines
|
| 378 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610925 ms - Host latency: 0.841382 ms (enqueue 0.268164 ms)
|
| 379 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610754 ms - Host latency: 0.839661 ms (enqueue 0.271851 ms)
|
| 380 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610718 ms - Host latency: 0.839661 ms (enqueue 0.269092 ms)
|
| 381 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610352 ms - Host latency: 0.839758 ms (enqueue 0.269727 ms)
|
| 382 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610413 ms - Host latency: 0.840808 ms (enqueue 0.267786 ms)
|
| 383 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610645 ms - Host latency: 0.842505 ms (enqueue 0.269287 ms)
|
| 384 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610022 ms - Host latency: 0.840356 ms (enqueue 0.27135 ms)
|
| 385 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610901 ms - Host latency: 0.841882 ms (enqueue 0.268079 ms)
|
| 386 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.6104 ms - Host latency: 0.841077 ms (enqueue 0.271912 ms)
|
| 387 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610413 ms - Host latency: 0.840149 ms (enqueue 0.293909 ms)
|
| 388 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610083 ms - Host latency: 0.841626 ms (enqueue 0.272107 ms)
|
| 389 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.6104 ms - Host latency: 0.84231 ms (enqueue 0.268506 ms)
|
| 390 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610767 ms - Host latency: 0.842163 ms (enqueue 0.266846 ms)
|
| 391 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610107 ms - Host latency: 0.84126 ms (enqueue 0.267407 ms)
|
| 392 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610669 ms - Host latency: 0.842358 ms (enqueue 0.266553 ms)
|
| 393 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611011 ms - Host latency: 0.842529 ms (enqueue 0.270117 ms)
|
| 394 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609814 ms - Host latency: 0.839502 ms (enqueue 0.284375 ms)
|
| 395 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610229 ms - Host latency: 0.839307 ms (enqueue 0.267773 ms)
|
| 396 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61001 ms - Host latency: 0.839185 ms (enqueue 0.266382 ms)
|
| 397 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610962 ms - Host latency: 0.840479 ms (enqueue 0.266675 ms)
|
| 398 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611353 ms - Host latency: 0.840698 ms (enqueue 0.266357 ms)
|
| 399 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610254 ms - Host latency: 0.840527 ms (enqueue 0.265991 ms)
|
| 400 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610718 ms - Host latency: 0.840845 ms (enqueue 0.266162 ms)
|
| 401 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611157 ms - Host latency: 0.845703 ms (enqueue 0.266089 ms)
|
| 402 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610449 ms - Host latency: 0.844385 ms (enqueue 0.2677 ms)
|
| 403 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610718 ms - Host latency: 0.842651 ms (enqueue 0.26792 ms)
|
| 404 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610376 ms - Host latency: 0.840625 ms (enqueue 0.267627 ms)
|
| 405 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610156 ms - Host latency: 0.839648 ms (enqueue 0.266309 ms)
|
| 406 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610205 ms - Host latency: 0.837549 ms (enqueue 0.279468 ms)
|
| 407 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610327 ms - Host latency: 0.839941 ms (enqueue 0.266821 ms)
|
| 408 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610425 ms - Host latency: 0.839478 ms (enqueue 0.265601 ms)
|
| 409 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610229 ms - Host latency: 0.839575 ms (enqueue 0.26521 ms)
|
| 410 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610718 ms - Host latency: 0.842358 ms (enqueue 0.268726 ms)
|
| 411 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610937 ms - Host latency: 0.843262 ms (enqueue 0.266357 ms)
|
| 412 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61062 ms - Host latency: 0.843066 ms (enqueue 0.26709 ms)
|
| 413 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610742 ms - Host latency: 0.840674 ms (enqueue 0.267676 ms)
|
| 414 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610547 ms - Host latency: 0.840918 ms (enqueue 0.266992 ms)
|
| 415 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610522 ms - Host latency: 0.843359 ms (enqueue 0.274756 ms)
|
| 416 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610498 ms - Host latency: 0.842041 ms (enqueue 0.267505 ms)
|
| 417 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610254 ms - Host latency: 0.839233 ms (enqueue 0.266602 ms)
|
| 418 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610034 ms - Host latency: 0.839185 ms (enqueue 0.265015 ms)
|
| 419 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610376 ms - Host latency: 0.84021 ms (enqueue 0.272925 ms)
|
| 420 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610522 ms - Host latency: 0.842505 ms (enqueue 0.268188 ms)
|
| 421 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610107 ms - Host latency: 0.841089 ms (enqueue 0.276025 ms)
|
| 422 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61062 ms - Host latency: 0.840454 ms (enqueue 0.276343 ms)
|
| 423 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610791 ms - Host latency: 0.840308 ms (enqueue 0.276514 ms)
|
| 424 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61084 ms - Host latency: 0.840649 ms (enqueue 0.272778 ms)
|
| 425 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611035 ms - Host latency: 0.841748 ms (enqueue 0.26731 ms)
|
| 426 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610815 ms - Host latency: 0.840015 ms (enqueue 0.266919 ms)
|
| 427 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610596 ms - Host latency: 0.842383 ms (enqueue 0.265796 ms)
|
| 428 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610376 ms - Host latency: 0.841602 ms (enqueue 0.267944 ms)
|
| 429 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611304 ms - Host latency: 0.841772 ms (enqueue 0.304175 ms)
|
| 430 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610718 ms - Host latency: 0.842627 ms (enqueue 0.26582 ms)
|
| 431 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610278 ms - Host latency: 0.839404 ms (enqueue 0.264209 ms)
|
| 432 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610669 ms - Host latency: 0.840186 ms (enqueue 0.262109 ms)
|
| 433 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610303 ms - Host latency: 0.840137 ms (enqueue 0.263403 ms)
|
| 434 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610645 ms - Host latency: 0.840015 ms (enqueue 0.264893 ms)
|
| 435 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610767 ms - Host latency: 0.840234 ms (enqueue 0.266797 ms)
|
| 436 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610205 ms - Host latency: 0.838354 ms (enqueue 0.273779 ms)
|
| 437 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610205 ms - Host latency: 0.840552 ms (enqueue 0.270972 ms)
|
| 438 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61062 ms - Host latency: 0.842554 ms (enqueue 0.266333 ms)
|
| 439 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610864 ms - Host latency: 0.843774 ms (enqueue 0.267236 ms)
|
| 440 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611133 ms - Host latency: 0.844556 ms (enqueue 0.268506 ms)
|
| 441 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610571 ms - Host latency: 0.840967 ms (enqueue 0.266895 ms)
|
| 442 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610767 ms - Host latency: 0.841455 ms (enqueue 0.266699 ms)
|
| 443 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610376 ms - Host latency: 0.839844 ms (enqueue 0.266064 ms)
|
| 444 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.6104 ms - Host latency: 0.839868 ms (enqueue 0.266235 ms)
|
| 445 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610107 ms - Host latency: 0.839307 ms (enqueue 0.266504 ms)
|
| 446 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610278 ms - Host latency: 0.839258 ms (enqueue 0.266187 ms)
|
| 447 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610254 ms - Host latency: 0.840942 ms (enqueue 0.267578 ms)
|
| 448 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.6104 ms - Host latency: 0.841943 ms (enqueue 0.266211 ms)
|
| 449 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611011 ms - Host latency: 0.84292 ms (enqueue 0.267017 ms)
|
| 450 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611084 ms - Host latency: 0.842358 ms (enqueue 0.269385 ms)
|
| 451 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610767 ms - Host latency: 0.840771 ms (enqueue 0.265771 ms)
|
| 452 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611157 ms - Host latency: 0.842749 ms (enqueue 0.265894 ms)
|
| 453 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611304 ms - Host latency: 0.843823 ms (enqueue 0.269141 ms)
|
| 454 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610254 ms - Host latency: 0.841724 ms (enqueue 0.265698 ms)
|
| 455 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610693 ms - Host latency: 0.840527 ms (enqueue 0.265601 ms)
|
| 456 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609961 ms - Host latency: 0.839966 ms (enqueue 0.265967 ms)
|
| 457 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609863 ms - Host latency: 0.841064 ms (enqueue 0.269922 ms)
|
| 458 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610156 ms - Host latency: 0.841528 ms (enqueue 0.266138 ms)
|
| 459 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61123 ms - Host latency: 0.841504 ms (enqueue 0.266431 ms)
|
| 460 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610278 ms - Host latency: 0.839941 ms (enqueue 0.266187 ms)
|
| 461 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609937 ms - Host latency: 0.83938 ms (enqueue 0.268286 ms)
|
| 462 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610547 ms - Host latency: 0.839868 ms (enqueue 0.267456 ms)
|
| 463 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.6104 ms - Host latency: 0.839063 ms (enqueue 0.26582 ms)
|
| 464 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610327 ms - Host latency: 0.84021 ms (enqueue 0.264966 ms)
|
| 465 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610596 ms - Host latency: 0.842163 ms (enqueue 0.268579 ms)
|
| 466 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610474 ms - Host latency: 0.845532 ms (enqueue 0.2677 ms)
|
| 467 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611328 ms - Host latency: 0.844727 ms (enqueue 0.266333 ms)
|
| 468 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61084 ms - Host latency: 0.841382 ms (enqueue 0.266846 ms)
|
| 469 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610181 ms - Host latency: 0.840137 ms (enqueue 0.26626 ms)
|
| 470 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610522 ms - Host latency: 0.839868 ms (enqueue 0.272412 ms)
|
| 471 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610791 ms - Host latency: 0.840234 ms (enqueue 0.268359 ms)
|
| 472 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610767 ms - Host latency: 0.839868 ms (enqueue 0.271997 ms)
|
| 473 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610156 ms - Host latency: 0.839307 ms (enqueue 0.267993 ms)
|
| 474 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610498 ms - Host latency: 0.839819 ms (enqueue 0.266064 ms)
|
| 475 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610791 ms - Host latency: 0.842432 ms (enqueue 0.265649 ms)
|
| 476 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611743 ms - Host latency: 0.844922 ms (enqueue 0.265918 ms)
|
| 477 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611182 ms - Host latency: 0.842896 ms (enqueue 0.271313 ms)
|
| 478 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610791 ms - Host latency: 0.842505 ms (enqueue 0.267114 ms)
|
| 479 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611084 ms - Host latency: 0.843628 ms (enqueue 0.266187 ms)
|
| 480 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610425 ms - Host latency: 0.841748 ms (enqueue 0.266089 ms)
|
| 481 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610913 ms - Host latency: 0.840381 ms (enqueue 0.26875 ms)
|
| 482 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.6104 ms - Host latency: 0.838696 ms (enqueue 0.266602 ms)
|
| 483 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61062 ms - Host latency: 0.839941 ms (enqueue 0.267163 ms)
|
| 484 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610107 ms - Host latency: 0.840796 ms (enqueue 0.2677 ms)
|
| 485 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61062 ms - Host latency: 0.842041 ms (enqueue 0.267065 ms)
|
| 486 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61084 ms - Host latency: 0.841919 ms (enqueue 0.265405 ms)
|
| 487 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610962 ms - Host latency: 0.841455 ms (enqueue 0.265527 ms)
|
| 488 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610376 ms - Host latency: 0.841406 ms (enqueue 0.265845 ms)
|
| 489 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610498 ms - Host latency: 0.840698 ms (enqueue 0.267163 ms)
|
| 490 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.6104 ms - Host latency: 0.840845 ms (enqueue 0.265332 ms)
|
| 491 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61001 ms - Host latency: 0.840552 ms (enqueue 0.266187 ms)
|
| 492 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61062 ms - Host latency: 0.842383 ms (enqueue 0.266357 ms)
|
| 493 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610718 ms - Host latency: 0.840942 ms (enqueue 0.266357 ms)
|
| 494 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610083 ms - Host latency: 0.841504 ms (enqueue 0.266187 ms)
|
| 495 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610571 ms - Host latency: 0.842017 ms (enqueue 0.26604 ms)
|
| 496 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610303 ms - Host latency: 0.840991 ms (enqueue 0.265698 ms)
|
| 497 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610571 ms - Host latency: 0.839917 ms (enqueue 0.26543 ms)
|
| 498 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610205 ms - Host latency: 0.839478 ms (enqueue 0.265454 ms)
|
| 499 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610059 ms - Host latency: 0.839453 ms (enqueue 0.265723 ms)
|
| 500 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610693 ms - Host latency: 0.840918 ms (enqueue 0.265991 ms)
|
| 501 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610815 ms - Host latency: 0.840747 ms (enqueue 0.265771 ms)
|
| 502 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610132 ms - Host latency: 0.840039 ms (enqueue 0.267139 ms)
|
| 503 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610815 ms - Host latency: 0.843359 ms (enqueue 0.266846 ms)
|
| 504 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61123 ms - Host latency: 0.845312 ms (enqueue 0.266553 ms)
|
| 505 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611255 ms - Host latency: 0.844092 ms (enqueue 0.265796 ms)
|
| 506 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609741 ms - Host latency: 0.840967 ms (enqueue 0.265479 ms)
|
| 507 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610571 ms - Host latency: 0.840234 ms (enqueue 0.266846 ms)
|
| 508 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610889 ms - Host latency: 0.840039 ms (enqueue 0.265405 ms)
|
| 509 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.6104 ms - Host latency: 0.840112 ms (enqueue 0.266382 ms)
|
| 510 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610181 ms - Host latency: 0.83938 ms (enqueue 0.268359 ms)
|
| 511 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61062 ms - Host latency: 0.839624 ms (enqueue 0.266748 ms)
|
| 512 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610718 ms - Host latency: 0.841187 ms (enqueue 0.267017 ms)
|
| 513 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610449 ms - Host latency: 0.841992 ms (enqueue 0.266748 ms)
|
| 514 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610352 ms - Host latency: 0.842456 ms (enqueue 0.265454 ms)
|
| 515 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611133 ms - Host latency: 0.84248 ms (enqueue 0.268237 ms)
|
| 516 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610791 ms - Host latency: 0.842432 ms (enqueue 0.266138 ms)
|
| 517 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609937 ms - Host latency: 0.842188 ms (enqueue 0.266504 ms)
|
| 518 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610522 ms - Host latency: 0.842285 ms (enqueue 0.265674 ms)
|
| 519 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610107 ms - Host latency: 0.840039 ms (enqueue 0.266357 ms)
|
| 520 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610156 ms - Host latency: 0.839185 ms (enqueue 0.265771 ms)
|
| 521 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610474 ms - Host latency: 0.839258 ms (enqueue 0.272437 ms)
|
| 522 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609839 ms - Host latency: 0.841016 ms (enqueue 0.266675 ms)
|
| 523 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610669 ms - Host latency: 0.841455 ms (enqueue 0.277979 ms)
|
| 524 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610645 ms - Host latency: 0.840112 ms (enqueue 0.269434 ms)
|
| 525 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609228 ms - Host latency: 0.838159 ms (enqueue 0.266016 ms)
|
| 526 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611011 ms - Host latency: 0.841309 ms (enqueue 0.266064 ms)
|
| 527 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611255 ms - Host latency: 0.84165 ms (enqueue 0.266577 ms)
|
| 528 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610718 ms - Host latency: 0.840991 ms (enqueue 0.264941 ms)
|
| 529 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610571 ms - Host latency: 0.842236 ms (enqueue 0.266406 ms)
|
| 530 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610718 ms - Host latency: 0.842334 ms (enqueue 0.295825 ms)
|
| 531 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610815 ms - Host latency: 0.843579 ms (enqueue 0.278613 ms)
|
| 532 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610376 ms - Host latency: 0.841992 ms (enqueue 0.266382 ms)
|
| 533 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610352 ms - Host latency: 0.841284 ms (enqueue 0.264795 ms)
|
| 534 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610254 ms - Host latency: 0.840186 ms (enqueue 0.265625 ms)
|
| 535 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610693 ms - Host latency: 0.840137 ms (enqueue 0.266162 ms)
|
| 536 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610596 ms - Host latency: 0.838647 ms (enqueue 0.26748 ms)
|
| 537 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610962 ms - Host latency: 0.840161 ms (enqueue 0.27002 ms)
|
| 538 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610864 ms - Host latency: 0.839819 ms (enqueue 0.265796 ms)
|
| 539 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610791 ms - Host latency: 0.839453 ms (enqueue 0.265283 ms)
|
| 540 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610742 ms - Host latency: 0.842432 ms (enqueue 0.265112 ms)
|
| 541 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610718 ms - Host latency: 0.843628 ms (enqueue 0.264282 ms)
|
| 542 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610522 ms - Host latency: 0.843677 ms (enqueue 0.263843 ms)
|
| 543 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610767 ms - Host latency: 0.841895 ms (enqueue 0.272705 ms)
|
| 544 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610962 ms - Host latency: 0.841504 ms (enqueue 0.2677 ms)
|
| 545 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610181 ms - Host latency: 0.839844 ms (enqueue 0.267944 ms)
|
| 546 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610352 ms - Host latency: 0.839624 ms (enqueue 0.267139 ms)
|
| 547 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610352 ms - Host latency: 0.839648 ms (enqueue 0.272363 ms)
|
| 548 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610937 ms - Host latency: 0.839014 ms (enqueue 0.268188 ms)
|
| 549 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609961 ms - Host latency: 0.839746 ms (enqueue 0.266602 ms)
|
| 550 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610791 ms - Host latency: 0.842139 ms (enqueue 0.26665 ms)
|
| 551 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610889 ms - Host latency: 0.841895 ms (enqueue 0.284204 ms)
|
| 552 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610742 ms - Host latency: 0.841406 ms (enqueue 0.269434 ms)
|
| 553 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610254 ms - Host latency: 0.840991 ms (enqueue 0.264795 ms)
|
| 554 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610425 ms - Host latency: 0.840771 ms (enqueue 0.269897 ms)
|
| 555 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610815 ms - Host latency: 0.84231 ms (enqueue 0.265186 ms)
|
| 556 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610498 ms - Host latency: 0.842358 ms (enqueue 0.265454 ms)
|
| 557 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610571 ms - Host latency: 0.839429 ms (enqueue 0.281226 ms)
|
| 558 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.611084 ms - Host latency: 0.840479 ms (enqueue 0.266772 ms)
|
| 559 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610254 ms - Host latency: 0.841016 ms (enqueue 0.272559 ms)
|
| 560 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610767 ms - Host latency: 0.842041 ms (enqueue 0.272827 ms)
|
| 561 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609912 ms - Host latency: 0.840649 ms (enqueue 0.267065 ms)
|
| 562 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610742 ms - Host latency: 0.840625 ms (enqueue 0.266406 ms)
|
| 563 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609863 ms - Host latency: 0.839526 ms (enqueue 0.266382 ms)
|
| 564 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610352 ms - Host latency: 0.84043 ms (enqueue 0.267822 ms)
|
| 565 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610547 ms - Host latency: 0.840308 ms (enqueue 0.267993 ms)
|
| 566 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610278 ms - Host latency: 0.840063 ms (enqueue 0.265601 ms)
|
| 567 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.609863 ms - Host latency: 0.840698 ms (enqueue 0.265356 ms)
|
| 568 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610791 ms - Host latency: 0.844287 ms (enqueue 0.265845 ms)
|
| 569 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610376 ms - Host latency: 0.844653 ms (enqueue 0.272949 ms)
|
| 570 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.61084 ms - Host latency: 0.842627 ms (enqueue 0.290405 ms)
|
| 571 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610547 ms - Host latency: 0.841748 ms (enqueue 0.284546 ms)
|
| 572 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610327 ms - Host latency: 0.839136 ms (enqueue 0.275562 ms)
|
| 573 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610229 ms - Host latency: 0.839502 ms (enqueue 0.273706 ms)
|
| 574 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610913 ms - Host latency: 0.839648 ms (enqueue 0.281372 ms)
|
| 575 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.610278 ms - Host latency: 0.838892 ms (enqueue 0.266992 ms)
|
| 576 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.618945 ms - Host latency: 0.84585 ms (enqueue 0.494409 ms)
|
| 577 |
+
[01/20/2026-06:53:03] [I] Average on 10 runs - GPU latency: 0.623315 ms - Host latency: 0.848169 ms (enqueue 0.545898 ms)
|
| 578 |
+
[01/20/2026-06:53:03] [I]
|
| 579 |
+
[01/20/2026-06:53:03] [I] === Performance summary ===
|
| 580 |
+
[01/20/2026-06:53:03] [I] Throughput: 1632.64 qps
|
| 581 |
+
[01/20/2026-06:53:03] [I] Latency: min = 0.829834 ms, max = 0.857178 ms, mean = 0.841102 ms, median = 0.840942 ms, percentile(90%) = 0.844482 ms, percentile(95%) = 0.845581 ms, percentile(99%) = 0.848145 ms
|
| 582 |
+
[01/20/2026-06:53:03] [I] Enqueue Time: min = 0.259766 ms, max = 0.737549 ms, mean = 0.270267 ms, median = 0.266846 ms, percentile(90%) = 0.27478 ms, percentile(95%) = 0.281372 ms, percentile(99%) = 0.325073 ms
|
| 583 |
+
[01/20/2026-06:53:03] [I] H2D Latency: min = 0.214478 ms, max = 0.234558 ms, mean = 0.225852 ms, median = 0.225494 ms, percentile(90%) = 0.22876 ms, percentile(95%) = 0.229492 ms, percentile(99%) = 0.232178 ms
|
| 584 |
+
[01/20/2026-06:53:03] [I] GPU Compute Time: min = 0.607178 ms, max = 0.630615 ms, mean = 0.610588 ms, median = 0.610352 ms, percentile(90%) = 0.612305 ms, percentile(95%) = 0.612366 ms, percentile(99%) = 0.613403 ms
|
| 585 |
+
[01/20/2026-06:53:03] [I] D2H Latency: min = 0.00390625 ms, max = 0.00830078 ms, mean = 0.00466213 ms, median = 0.0045166 ms, percentile(90%) = 0.00543213 ms, percentile(95%) = 0.0057373 ms, percentile(99%) = 0.00604248 ms
|
| 586 |
+
[01/20/2026-06:53:03] [I] Total Host Walltime: 3.00189 s
|
| 587 |
+
[01/20/2026-06:53:03] [I] Total GPU Compute Time: 2.99249 s
|
| 588 |
+
[01/20/2026-06:53:03] [I] Explanations of the performance metrics are printed in the verbose logs.
|
| 589 |
+
[01/20/2026-06:53:03] [I]
|
| 590 |
+
&&&& PASSED TensorRT.trtexec [TensorRT v101401] [b48] # trtexec --onnx=checkpoints/deimv2_hgnetv2_pico_coco.onnx --saveEngine=checkpoints/deimv2_hgnetv2_pico_coco.engine --fp16 --optShapes=images:1x3x640x640,orig_target_sizes:1x2 --memPoolSize=workspace:4096 --builderOptimizationLevel=3
|
deimv2_hgnetv2_pico_coco.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa5ef65380abf1991187b81f2b4bae6fe0843e8141827a27acfe5947386d68ab
|
| 3 |
+
size 6374729
|
deimv2_hgnetv2_pico_coco.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8cb69e7cdd4b7fc414831a6e3e99a6c9d721ef9e268bef3ff549b3fcd72231f
|
| 3 |
+
size 6437843
|