| { |
| "title": "Edge Inference Benchmarks", |
| "subtitle": "Compare throughput and latency across devices and model variants.", |
| "model_column": "model", |
| "model_family_column": "model_family", |
| "model_link_prefix": "https://huggingface.co/", |
| "optimized_org": "embedl", |
| "filters": [ |
| { |
| "column": "type", |
| "label": "Modality" |
| }, |
| { |
| "column": "batch", |
| "label": "Batch Size", |
| "type": "number" |
| }, |
| { |
| "column": "device", |
| "label": "Device", |
| "value_labels": { |
| "orin_nano": "Jetson Orin Nano Super", |
| "orin_nano_super": "Jetson Orin Nano Super", |
| "agx_orin": "Jetson AGX Orin", |
| "agx_thor": "Jetson AGX Thor" |
| } |
| } |
| ], |
| "metrics": [ |
| { |
| "column": "tps", |
| "label": "Tokens / sec", |
| "short": "TPS ↑", |
| "higher_is_better": true, |
| "description": "Tokens per second (higher is better). Number of output tokens generated per second during the decoding phase. " |
| }, |
| { |
| "column": "tpot", |
| "label": "Time per Output Token (ms)", |
| "short": "TPOT ↓", |
| "higher_is_better": false, |
| "description": "Time per output token in ms (lower is better). Average time (in milliseconds) required to generate one output token during decoding. Computed as TPOT = (last_token_ts - first_token_ts) / total_output_tokens." |
| }, |
| { |
| "column": "ttft", |
| "label": "Time to First Token (ms)", |
| "short": "TTFT ↓", |
| "higher_is_better": false, |
| "description": "Time to first token in ms (lower is better). Time from request submission to generation of the first output token. This includes vision encoding, prompt prefill, KV cache initialization." |
| }, |
| { |
| "column": "e2e", |
| "label": "End-to-End Latency (sec)", |
| "short": "E2E ↓", |
| "higher_is_better": false, |
| "description": "End-to-end latency in seconds (lower is better). Total time from request submission to completion of the full generated response. This reflects real user-perceived latency." |
| } |
| ], |
| "display_columns": [ |
| { |
| "column": "res", |
| "label": "Resolution", |
| "visible_when": { |
| "type": [ |
| "video", |
| "image" |
| ] |
| } |
| }, |
| { |
| "column": "fps", |
| "label": "FPS", |
| "type": "number", |
| "visible_when": { |
| "type": [ |
| "video" |
| ] |
| } |
| }, |
| { |
| "column": "frames", |
| "label": "Frames", |
| "type": "number", |
| "visible_when": { |
| "type": [ |
| "video" |
| ] |
| } |
| } |
| ], |
| "chart": { |
| "default_metric": "tps", |
| "group_by": "device", |
| "scenarios": [ |
| { |
| "label": "Text", |
| "match": { |
| "type": "text" |
| } |
| }, |
| { |
| "label": "Image · 1280×720", |
| "match": { |
| "type": "image", |
| "res": "1280x720" |
| } |
| }, |
| { |
| "label": "Video · 1280×720 · 4 FPS", |
| "match": { |
| "type": "video", |
| "res": "1280x720", |
| "fps": 4 |
| } |
| } |
| ] |
| }, |
| "table_sort": [ |
| { |
| "column": "res", |
| "direction": "asc" |
| }, |
| { |
| "column": "fps", |
| "direction": "desc" |
| } |
| ], |
| "table_group_by": "model", |
| "model_families": { |
| "Cosmos-Reason2-2B": { |
| "data_file": "data/cosmos-reason2.csv", |
| "table_group_by": ["res", "fps"], |
| "experiment_setup": { |
| "agx_thor": "Measurement setup: NVIDIA vLLM 26.01, 256 tokens generated, 10 warm-up runs, averaged over 25 runs.", |
| "agx_orin": "Measurement setup: NVIDIA vLLM 0.14.0 for Jetson, 256 tokens generated, 10 warm-up runs, averaged over 25 runs.", |
| "orin_nano": "Measurement setup: NVIDIA vLLM 0.14.0 for Jetson, 256 tokens generated, 10 warm-up runs, averaged over 25 runs." |
| } |
| } |
| } |
| } |
|
|