Upload folder using huggingface_hub
Browse files- benchmark_logs/[inc-requant-woq-staticfp8-dmoe-fp8kv-delayedsampling]static-online-gaudi3-0.92util-TPparallel8-EP8-loop1moegroups-multistep1_nprompt448_rrateinf_bs448_i1024_o1024_mdllen2048_run1.log +29 -0
- benchmark_logs/[inc-requant-woq-staticfp8-dmoe-fp8kv-delayedsampling]static-online-gaudi3-0.92util-TPparallel8-EP8-loop1moegroups-multistep1_nprompt448_rrateinf_bs448_i1024_o1024_mdllen2048_serving.log +0 -0
- benchmark_logs/[staticfp8-dmoe-fp8kv-delayedsampling]static-online-gaudi3-0.92util-TPparallel8-EP8-loop1moegroups-multistep1_nprompt448_rrateinf_bs448_i1024_o1024_mdllen2048_run1.log +29 -0
- benchmark_logs/[staticfp8-dmoe-fp8kv-delayedsampling]static-online-gaudi3-0.92util-TPparallel8-EP8-loop1moegroups-multistep1_nprompt448_rrateinf_bs448_i1024_o1024_mdllen2048_serving.log +0 -0
benchmark_logs/[inc-requant-woq-staticfp8-dmoe-fp8kv-delayedsampling]static-online-gaudi3-0.92util-TPparallel8-EP8-loop1moegroups-multistep1_nprompt448_rrateinf_bs448_i1024_o1024_mdllen2048_run1.log
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
0%| | 0/448 [00:00<?, ?it/s]
|
| 1 |
0%| | 1/448 [06:10<45:58:53, 370.32s/it]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO 03-24 02:44:45 __init__.py:199] Automatically detected platform hpu.
|
| 2 |
+
Namespace(backend='vllm', base_url=None, host='localhost', port=8080, endpoint='/v1/completions', dataset=None, dataset_name='sonnet', dataset_path='benchmarks/sonnet.txt', max_concurrency=None, model='/data/models/DeepSeek-R1/', tokenizer='/data/models/DeepSeek-R1/', best_of=1, use_beam_search=False, num_prompts=448, logprobs=None, request_rate=inf, burstiness=1.0, seed=0, trust_remote_code=False, disable_tqdm=False, profile=False, save_result=False, metadata=None, result_dir=None, result_filename=None, ignore_eos=False, percentile_metrics='ttft,tpot,itl', metric_percentiles='99', goodput=None, sonnet_input_len=1024, sonnet_output_len=1024, sonnet_prefix_len=100, sharegpt_output_len=None, random_input_len=1024, random_output_len=128, random_range_ratio=1.0, random_prefix_len=0, hf_subset=None, hf_split=None, hf_output_len=None, tokenizer_mode='auto', served_model_name=None, lora_modules=None)
|
| 3 |
+
Starting initial single prompt test run...
|
| 4 |
+
Initial test run completed. Starting main benchmark run...
|
| 5 |
+
Traffic request rate: inf
|
| 6 |
+
Burstiness factor: 1.0 (Poisson process)
|
| 7 |
+
Maximum request concurrency: None
|
| 8 |
+
|
| 9 |
0%| | 0/448 [00:00<?, ?it/s]
|
| 10 |
0%| | 1/448 [06:10<45:58:53, 370.32s/it]
|
| 11 |
+
============ Serving Benchmark Result ============
|
| 12 |
+
Successful requests: 448
|
| 13 |
+
Benchmark duration (s): 370.37
|
| 14 |
+
Total input tokens: 414918
|
| 15 |
+
Total generated tokens: 458752
|
| 16 |
+
Request throughput (req/s): 1.21
|
| 17 |
+
Output token throughput (tok/s): 1238.64
|
| 18 |
+
Total Token throughput (tok/s): 2358.93
|
| 19 |
+
---------------Time to First Token----------------
|
| 20 |
+
Mean TTFT (ms): 16517.07
|
| 21 |
+
Median TTFT (ms): 16477.37
|
| 22 |
+
P99 TTFT (ms): 31681.38
|
| 23 |
+
-----Time per Output Token (excl. 1st token)------
|
| 24 |
+
Mean TPOT (ms): 345.77
|
| 25 |
+
Median TPOT (ms): 345.83
|
| 26 |
+
P99 TPOT (ms): 360.52
|
| 27 |
+
---------------Inter-token Latency----------------
|
| 28 |
+
Mean ITL (ms): 345.77
|
| 29 |
+
Median ITL (ms): 350.61
|
| 30 |
+
P99 ITL (ms): 386.83
|
| 31 |
+
==================================================
|
benchmark_logs/[inc-requant-woq-staticfp8-dmoe-fp8kv-delayedsampling]static-online-gaudi3-0.92util-TPparallel8-EP8-loop1moegroups-multistep1_nprompt448_rrateinf_bs448_i1024_o1024_mdllen2048_serving.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
benchmark_logs/[staticfp8-dmoe-fp8kv-delayedsampling]static-online-gaudi3-0.92util-TPparallel8-EP8-loop1moegroups-multistep1_nprompt448_rrateinf_bs448_i1024_o1024_mdllen2048_run1.log
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
0%| | 0/448 [00:00<?, ?it/s]
|
| 1 |
0%| | 1/448 [03:38<27:11:15, 218.96s/it]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
INFO 03-24 01:45:30 __init__.py:199] Automatically detected platform hpu.
|
| 2 |
+
Namespace(backend='vllm', base_url=None, host='localhost', port=8080, endpoint='/v1/completions', dataset=None, dataset_name='sonnet', dataset_path='benchmarks/sonnet.txt', max_concurrency=None, model='/data/models/DeepSeek-R1-static/', tokenizer='/data/models/DeepSeek-R1-static/', best_of=1, use_beam_search=False, num_prompts=448, logprobs=None, request_rate=inf, burstiness=1.0, seed=0, trust_remote_code=False, disable_tqdm=False, profile=False, save_result=False, metadata=None, result_dir=None, result_filename=None, ignore_eos=False, percentile_metrics='ttft,tpot,itl', metric_percentiles='99', goodput=None, sonnet_input_len=1024, sonnet_output_len=1024, sonnet_prefix_len=100, sharegpt_output_len=None, random_input_len=1024, random_output_len=128, random_range_ratio=1.0, random_prefix_len=0, hf_subset=None, hf_split=None, hf_output_len=None, tokenizer_mode='auto', served_model_name=None, lora_modules=None)
|
| 3 |
+
Starting initial single prompt test run...
|
| 4 |
+
Initial test run completed. Starting main benchmark run...
|
| 5 |
+
Traffic request rate: inf
|
| 6 |
+
Burstiness factor: 1.0 (Poisson process)
|
| 7 |
+
Maximum request concurrency: None
|
| 8 |
+
|
| 9 |
0%| | 0/448 [00:00<?, ?it/s]
|
| 10 |
0%| | 1/448 [03:38<27:11:15, 218.96s/it]
|
| 11 |
+
============ Serving Benchmark Result ============
|
| 12 |
+
Successful requests: 448
|
| 13 |
+
Benchmark duration (s): 219.01
|
| 14 |
+
Total input tokens: 414918
|
| 15 |
+
Total generated tokens: 458752
|
| 16 |
+
Request throughput (req/s): 2.05
|
| 17 |
+
Output token throughput (tok/s): 2094.71
|
| 18 |
+
Total Token throughput (tok/s): 3989.26
|
| 19 |
+
---------------Time to First Token----------------
|
| 20 |
+
Mean TTFT (ms): 16797.56
|
| 21 |
+
Median TTFT (ms): 16810.33
|
| 22 |
+
P99 TTFT (ms): 32200.30
|
| 23 |
+
-----Time per Output Token (excl. 1st token)------
|
| 24 |
+
Mean TPOT (ms): 197.54
|
| 25 |
+
Median TPOT (ms): 197.54
|
| 26 |
+
P99 TPOT (ms): 212.64
|
| 27 |
+
---------------Inter-token Latency----------------
|
| 28 |
+
Mean ITL (ms): 197.54
|
| 29 |
+
Median ITL (ms): 183.74
|
| 30 |
+
P99 ITL (ms): 228.45
|
| 31 |
+
==================================================
|
benchmark_logs/[staticfp8-dmoe-fp8kv-delayedsampling]static-online-gaudi3-0.92util-TPparallel8-EP8-loop1moegroups-multistep1_nprompt448_rrateinf_bs448_i1024_o1024_mdllen2048_serving.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|