Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- data.pt +3 -0
- hf/added_tokens.json +28 -0
- hf/chat_template.jinja +85 -0
- hf/config.json +34 -0
- hf/generation_config.json +13 -0
- hf/merges.txt +0 -0
- hf/model-00001-of-00014.safetensors +3 -0
- hf/model-00002-of-00014.safetensors +3 -0
- hf/model-00003-of-00014.safetensors +3 -0
- hf/model-00004-of-00014.safetensors +3 -0
- hf/model-00005-of-00014.safetensors +3 -0
- hf/model-00006-of-00014.safetensors +3 -0
- hf/model-00007-of-00014.safetensors +3 -0
- hf/model-00008-of-00014.safetensors +3 -0
- hf/model-00009-of-00014.safetensors +3 -0
- hf/model-00010-of-00014.safetensors +3 -0
- hf/model-00011-of-00014.safetensors +3 -0
- hf/model-00012-of-00014.safetensors +3 -0
- hf/model-00013-of-00014.safetensors +3 -0
- hf/model-00014-of-00014.safetensors +3 -0
- hf/model.safetensors.index.json +714 -0
- hf/special_tokens_map.json +31 -0
- hf/tokenizer.json +3 -0
- hf/tokenizer_config.json +240 -0
- hf/tokenizer_config.json.bak +239 -0
- hf/vocab.json +0 -0
- hf_ip/lb_endpoint.txt +1 -0
- hf_ip/load_balancer.log +1198 -0
- hf_ip/vllm_gpu0.log +312 -0
- hf_ip/vllm_gpu1.log +387 -0
- hf_ip/vllm_gpu2.log +327 -0
- hf_ip/vllm_gpu3.log +280 -0
- hf_ip/vllm_gpu4.log +305 -0
- hf_ip/vllm_gpu5.log +273 -0
- hf_ip/vllm_gpu6.log +283 -0
- hf_ip/vllm_gpu7.log +377 -0
- hf_ip/vllm_instances.txt +232 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c378db65a2203aa8a78056fbcf7fd2d9f96a608110397a3ee932d74213ac7c9
|
| 3 |
+
size 1492
|
hf/added_tokens.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</think>": 151668,
|
| 3 |
+
"</tool_call>": 151658,
|
| 4 |
+
"</tool_response>": 151666,
|
| 5 |
+
"<think>": 151667,
|
| 6 |
+
"<tool_call>": 151657,
|
| 7 |
+
"<tool_response>": 151665,
|
| 8 |
+
"<|box_end|>": 151649,
|
| 9 |
+
"<|box_start|>": 151648,
|
| 10 |
+
"<|endoftext|>": 151643,
|
| 11 |
+
"<|file_sep|>": 151664,
|
| 12 |
+
"<|fim_middle|>": 151660,
|
| 13 |
+
"<|fim_pad|>": 151662,
|
| 14 |
+
"<|fim_prefix|>": 151659,
|
| 15 |
+
"<|fim_suffix|>": 151661,
|
| 16 |
+
"<|im_end|>": 151645,
|
| 17 |
+
"<|im_start|>": 151644,
|
| 18 |
+
"<|image_pad|>": 151655,
|
| 19 |
+
"<|object_ref_end|>": 151647,
|
| 20 |
+
"<|object_ref_start|>": 151646,
|
| 21 |
+
"<|quad_end|>": 151651,
|
| 22 |
+
"<|quad_start|>": 151650,
|
| 23 |
+
"<|repo_name|>": 151663,
|
| 24 |
+
"<|video_pad|>": 151656,
|
| 25 |
+
"<|vision_end|>": 151653,
|
| 26 |
+
"<|vision_pad|>": 151654,
|
| 27 |
+
"<|vision_start|>": 151652
|
| 28 |
+
}
|
hf/chat_template.jinja
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0].role == 'system' %}
|
| 4 |
+
{{- messages[0].content + '\n\n' }}
|
| 5 |
+
{%- endif %}
|
| 6 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 7 |
+
{%- for tool in tools %}
|
| 8 |
+
{{- "\n" }}
|
| 9 |
+
{{- tool | tojson }}
|
| 10 |
+
{%- endfor %}
|
| 11 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 12 |
+
{%- else %}
|
| 13 |
+
{%- if messages[0].role == 'system' %}
|
| 14 |
+
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
| 15 |
+
{%- endif %}
|
| 16 |
+
{%- endif %}
|
| 17 |
+
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
| 18 |
+
{%- for message in messages[::-1] %}
|
| 19 |
+
{%- set index = (messages|length - 1) - loop.index0 %}
|
| 20 |
+
{%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
| 21 |
+
{%- set ns.multi_step_tool = false %}
|
| 22 |
+
{%- set ns.last_query_index = index %}
|
| 23 |
+
{%- endif %}
|
| 24 |
+
{%- endfor %}
|
| 25 |
+
{%- for message in messages %}
|
| 26 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
| 27 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
| 28 |
+
{%- elif message.role == "assistant" %}
|
| 29 |
+
{%- set content = message.content %}
|
| 30 |
+
{%- set reasoning_content = '' %}
|
| 31 |
+
{%- if message.reasoning_content is defined and message.reasoning_content is not none %}
|
| 32 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 33 |
+
{%- else %}
|
| 34 |
+
{%- if '</think>' in message.content %}
|
| 35 |
+
{%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
|
| 36 |
+
{%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 37 |
+
{%- endif %}
|
| 38 |
+
{%- endif %}
|
| 39 |
+
{%- if loop.index0 > ns.last_query_index %}
|
| 40 |
+
{%- if loop.last or (not loop.last and reasoning_content) %}
|
| 41 |
+
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
| 42 |
+
{%- else %}
|
| 43 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 44 |
+
{%- endif %}
|
| 45 |
+
{%- else %}
|
| 46 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 47 |
+
{%- endif %}
|
| 48 |
+
{%- if message.tool_calls %}
|
| 49 |
+
{%- for tool_call in message.tool_calls %}
|
| 50 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
| 51 |
+
{{- '\n' }}
|
| 52 |
+
{%- endif %}
|
| 53 |
+
{%- if tool_call.function %}
|
| 54 |
+
{%- set tool_call = tool_call.function %}
|
| 55 |
+
{%- endif %}
|
| 56 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 57 |
+
{{- tool_call.name }}
|
| 58 |
+
{{- '", "arguments": ' }}
|
| 59 |
+
{%- if tool_call.arguments is string %}
|
| 60 |
+
{{- tool_call.arguments }}
|
| 61 |
+
{%- else %}
|
| 62 |
+
{{- tool_call.arguments | tojson }}
|
| 63 |
+
{%- endif %}
|
| 64 |
+
{{- '}\n</tool_call>' }}
|
| 65 |
+
{%- endfor %}
|
| 66 |
+
{%- endif %}
|
| 67 |
+
{{- '<|im_end|>\n' }}
|
| 68 |
+
{%- elif message.role == "tool" %}
|
| 69 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 70 |
+
{{- '<|im_start|>user' }}
|
| 71 |
+
{%- endif %}
|
| 72 |
+
{{- '\n<tool_response>\n' }}
|
| 73 |
+
{{- message.content }}
|
| 74 |
+
{{- '\n</tool_response>' }}
|
| 75 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 76 |
+
{{- '<|im_end|>\n' }}
|
| 77 |
+
{%- endif %}
|
| 78 |
+
{%- endif %}
|
| 79 |
+
{%- endfor %}
|
| 80 |
+
{%- if add_generation_prompt %}
|
| 81 |
+
{{- '<|im_start|>assistant\n' }}
|
| 82 |
+
{%- if enable_thinking is defined and enable_thinking is false %}
|
| 83 |
+
{{- '<think>\n\n</think>\n\n' }}
|
| 84 |
+
{%- endif %}
|
| 85 |
+
{%- endif %}
|
hf/config.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"eos_token_id": 151645,
|
| 8 |
+
"head_dim": 128,
|
| 9 |
+
"hidden_act": "silu",
|
| 10 |
+
"hidden_size": 5120,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 25600,
|
| 13 |
+
"max_position_embeddings": 131072,
|
| 14 |
+
"max_window_layers": 64,
|
| 15 |
+
"model_type": "qwen3",
|
| 16 |
+
"num_attention_heads": 64,
|
| 17 |
+
"num_hidden_layers": 64,
|
| 18 |
+
"num_key_value_heads": 8,
|
| 19 |
+
"pad_token_id": 151643,
|
| 20 |
+
"rms_norm_eps": 1e-06,
|
| 21 |
+
"rope_scaling": {
|
| 22 |
+
"factor": 4.0,
|
| 23 |
+
"original_max_position_embeddings": 32768,
|
| 24 |
+
"rope_type": "yarn"
|
| 25 |
+
},
|
| 26 |
+
"rope_theta": 1000000,
|
| 27 |
+
"sliding_window": null,
|
| 28 |
+
"tie_word_embeddings": false,
|
| 29 |
+
"torch_dtype": "bfloat16",
|
| 30 |
+
"transformers_version": "4.52.4",
|
| 31 |
+
"use_cache": true,
|
| 32 |
+
"use_sliding_window": false,
|
| 33 |
+
"vocab_size": 151936
|
| 34 |
+
}
|
hf/generation_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 151643,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
151645,
|
| 6 |
+
151643
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151643,
|
| 9 |
+
"temperature": 0.6,
|
| 10 |
+
"top_k": 20,
|
| 11 |
+
"top_p": 0.95,
|
| 12 |
+
"transformers_version": "4.52.4"
|
| 13 |
+
}
|
hf/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hf/model-00001-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30e6f85e21a0e9d3483e8bb9e6010c80a8e797719552329ae795f76a26b52447
|
| 3 |
+
size 4928419424
|
hf/model-00002-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99db49822e9f64fd5502fd0d42e2e5a6fb43df4206fe161cb1c8120e32670637
|
| 3 |
+
size 4781605144
|
hf/model-00003-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e07a88f5427346dfe5224e4b2c81368b18a293ae92b747c1e1a0cd18411032c5
|
| 3 |
+
size 4928450568
|
hf/model-00004-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5888990fba12830a1c24bc6cc470697284f57fa3ff9bc68abb2f27c15ec9ae08
|
| 3 |
+
size 4980813680
|
hf/model-00005-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:493e7423c2354cdc29e88cb6bbd439b9fe0a0c5dc44c9df7f912ef36f3de6997
|
| 3 |
+
size 4991315040
|
hf/model-00006-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:540b426e81ddf3232e20258648997d52ece0532c0d3b88742c0dcd232beedda3
|
| 3 |
+
size 4949367504
|
hf/model-00007-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:39b294addd72f12cb2c04c6ff91856a20f48ffc5e5300508ee3aa261a51b86c0
|
| 3 |
+
size 4854996904
|
hf/model-00008-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a5971c2dc9c302a74844a6a8c0ac3c18776892563d5d07c6d473dbdd2e46d5b
|
| 3 |
+
size 3565289480
|
hf/model-00009-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f948de65d6c0c4da9aa5c7b1b5634c10a92dd422f494c9554ffe06b9a68cd758
|
| 3 |
+
size 4890335312
|
hf/model-00010-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b48a6226ab1fead18741840d0c6c2ff7a713a182210468b5861586b9a05656c7
|
| 3 |
+
size 4785473976
|
hf/model-00011-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:307afed4ca7ac0aa48c4faf376bb3d654e143f2a6d4145b77416d8afc06cca2a
|
| 3 |
+
size 4834025448
|
hf/model-00012-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0533a6fe98af4e719b356da4f5a996defcd6614f66fbd5c3787497ef0f9894e
|
| 3 |
+
size 4792103184
|
hf/model-00013-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47c9088b8ab86b8c928cdfbb9a38e390e0a0ac716285e348e5ed4fcfd0854c70
|
| 3 |
+
size 4949465424
|
hf/model-00014-of-00014.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31d997e89fb92893911b4214b4a51f7c6ed4ab25fccf47488ca5373b964e66e9
|
| 3 |
+
size 3292667304
|
hf/model.safetensors.index.json
ADDED
|
@@ -0,0 +1,714 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metadata": {
|
| 3 |
+
"total_size": 65524246528
|
| 4 |
+
},
|
| 5 |
+
"weight_map": {
|
| 6 |
+
"lm_head.weight": "model-00009-of-00014.safetensors",
|
| 7 |
+
"model.embed_tokens.weight": "model-00010-of-00014.safetensors",
|
| 8 |
+
"model.layers.0.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 9 |
+
"model.layers.0.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
| 10 |
+
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
| 11 |
+
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
| 12 |
+
"model.layers.0.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 13 |
+
"model.layers.0.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
| 14 |
+
"model.layers.0.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 15 |
+
"model.layers.0.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 16 |
+
"model.layers.0.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
| 17 |
+
"model.layers.0.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 18 |
+
"model.layers.0.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
| 19 |
+
"model.layers.1.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 20 |
+
"model.layers.1.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 21 |
+
"model.layers.1.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 22 |
+
"model.layers.1.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 23 |
+
"model.layers.1.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 24 |
+
"model.layers.1.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
| 25 |
+
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
| 26 |
+
"model.layers.1.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 27 |
+
"model.layers.1.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
| 28 |
+
"model.layers.1.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
| 29 |
+
"model.layers.1.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 30 |
+
"model.layers.10.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 31 |
+
"model.layers.10.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 32 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 33 |
+
"model.layers.10.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 34 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 35 |
+
"model.layers.10.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 36 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 37 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 38 |
+
"model.layers.10.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 39 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 40 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
| 41 |
+
"model.layers.11.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 42 |
+
"model.layers.11.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
| 43 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 44 |
+
"model.layers.11.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
| 45 |
+
"model.layers.11.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 46 |
+
"model.layers.11.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 47 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
| 48 |
+
"model.layers.11.self_attn.o_proj.weight": "model-00014-of-00014.safetensors",
|
| 49 |
+
"model.layers.11.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 50 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
| 51 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 52 |
+
"model.layers.12.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 53 |
+
"model.layers.12.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
| 54 |
+
"model.layers.12.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
| 55 |
+
"model.layers.12.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 56 |
+
"model.layers.12.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 57 |
+
"model.layers.12.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
| 58 |
+
"model.layers.12.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 59 |
+
"model.layers.12.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 60 |
+
"model.layers.12.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
| 61 |
+
"model.layers.12.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 62 |
+
"model.layers.12.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 63 |
+
"model.layers.13.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 64 |
+
"model.layers.13.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 65 |
+
"model.layers.13.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
| 66 |
+
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 67 |
+
"model.layers.13.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 68 |
+
"model.layers.13.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 69 |
+
"model.layers.13.self_attn.k_proj.weight": "model-00014-of-00014.safetensors",
|
| 70 |
+
"model.layers.13.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 71 |
+
"model.layers.13.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 72 |
+
"model.layers.13.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 73 |
+
"model.layers.13.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 74 |
+
"model.layers.14.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 75 |
+
"model.layers.14.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
| 76 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
| 77 |
+
"model.layers.14.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 78 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 79 |
+
"model.layers.14.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 80 |
+
"model.layers.14.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 81 |
+
"model.layers.14.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
|
| 82 |
+
"model.layers.14.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 83 |
+
"model.layers.14.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
| 84 |
+
"model.layers.14.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 85 |
+
"model.layers.15.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 86 |
+
"model.layers.15.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 87 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
| 88 |
+
"model.layers.15.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 89 |
+
"model.layers.15.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 90 |
+
"model.layers.15.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
| 91 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
|
| 92 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
| 93 |
+
"model.layers.15.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 94 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
| 95 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
| 96 |
+
"model.layers.16.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 97 |
+
"model.layers.16.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 98 |
+
"model.layers.16.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 99 |
+
"model.layers.16.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
| 100 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 101 |
+
"model.layers.16.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 102 |
+
"model.layers.16.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 103 |
+
"model.layers.16.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 104 |
+
"model.layers.16.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
| 105 |
+
"model.layers.16.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 106 |
+
"model.layers.16.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 107 |
+
"model.layers.17.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 108 |
+
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 109 |
+
"model.layers.17.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 110 |
+
"model.layers.17.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
| 111 |
+
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 112 |
+
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 113 |
+
"model.layers.17.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
| 114 |
+
"model.layers.17.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 115 |
+
"model.layers.17.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 116 |
+
"model.layers.17.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 117 |
+
"model.layers.17.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 118 |
+
"model.layers.18.input_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 119 |
+
"model.layers.18.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 120 |
+
"model.layers.18.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
| 121 |
+
"model.layers.18.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
| 122 |
+
"model.layers.18.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 123 |
+
"model.layers.18.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
| 124 |
+
"model.layers.18.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 125 |
+
"model.layers.18.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 126 |
+
"model.layers.18.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 127 |
+
"model.layers.18.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 128 |
+
"model.layers.18.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 129 |
+
"model.layers.19.input_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 130 |
+
"model.layers.19.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
| 131 |
+
"model.layers.19.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
| 132 |
+
"model.layers.19.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 133 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 134 |
+
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 135 |
+
"model.layers.19.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
| 136 |
+
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 137 |
+
"model.layers.19.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 138 |
+
"model.layers.19.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
| 139 |
+
"model.layers.19.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
| 140 |
+
"model.layers.2.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 141 |
+
"model.layers.2.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 142 |
+
"model.layers.2.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
|
| 143 |
+
"model.layers.2.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 144 |
+
"model.layers.2.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 145 |
+
"model.layers.2.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 146 |
+
"model.layers.2.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 147 |
+
"model.layers.2.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 148 |
+
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 149 |
+
"model.layers.2.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
| 150 |
+
"model.layers.2.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 151 |
+
"model.layers.20.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 152 |
+
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 153 |
+
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 154 |
+
"model.layers.20.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 155 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 156 |
+
"model.layers.20.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 157 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 158 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 159 |
+
"model.layers.20.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
| 160 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 161 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
| 162 |
+
"model.layers.21.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 163 |
+
"model.layers.21.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 164 |
+
"model.layers.21.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 165 |
+
"model.layers.21.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 166 |
+
"model.layers.21.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 167 |
+
"model.layers.21.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 168 |
+
"model.layers.21.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
| 169 |
+
"model.layers.21.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 170 |
+
"model.layers.21.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 171 |
+
"model.layers.21.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 172 |
+
"model.layers.21.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 173 |
+
"model.layers.22.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 174 |
+
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 175 |
+
"model.layers.22.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 176 |
+
"model.layers.22.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 177 |
+
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 178 |
+
"model.layers.22.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 179 |
+
"model.layers.22.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
| 180 |
+
"model.layers.22.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 181 |
+
"model.layers.22.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 182 |
+
"model.layers.22.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
| 183 |
+
"model.layers.22.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 184 |
+
"model.layers.23.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 185 |
+
"model.layers.23.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
| 186 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 187 |
+
"model.layers.23.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 188 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 189 |
+
"model.layers.23.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
| 190 |
+
"model.layers.23.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 191 |
+
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 192 |
+
"model.layers.23.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 193 |
+
"model.layers.23.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 194 |
+
"model.layers.23.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 195 |
+
"model.layers.24.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 196 |
+
"model.layers.24.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 197 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 198 |
+
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 199 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 200 |
+
"model.layers.24.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 201 |
+
"model.layers.24.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
| 202 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 203 |
+
"model.layers.24.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
|
| 204 |
+
"model.layers.24.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 205 |
+
"model.layers.24.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 206 |
+
"model.layers.25.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 207 |
+
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 208 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
| 209 |
+
"model.layers.25.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 210 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 211 |
+
"model.layers.25.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 212 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 213 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
| 214 |
+
"model.layers.25.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 215 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 216 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 217 |
+
"model.layers.26.input_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 218 |
+
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 219 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
| 220 |
+
"model.layers.26.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
|
| 221 |
+
"model.layers.26.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 222 |
+
"model.layers.26.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 223 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
| 224 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 225 |
+
"model.layers.26.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 226 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 227 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
| 228 |
+
"model.layers.27.input_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 229 |
+
"model.layers.27.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
| 230 |
+
"model.layers.27.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
| 231 |
+
"model.layers.27.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 232 |
+
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 233 |
+
"model.layers.27.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
| 234 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 235 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00014-of-00014.safetensors",
|
| 236 |
+
"model.layers.27.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 237 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
| 238 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
| 239 |
+
"model.layers.28.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 240 |
+
"model.layers.28.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 241 |
+
"model.layers.28.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 242 |
+
"model.layers.28.mlp.up_proj.weight": "model-00014-of-00014.safetensors",
|
| 243 |
+
"model.layers.28.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 244 |
+
"model.layers.28.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 245 |
+
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 246 |
+
"model.layers.28.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 247 |
+
"model.layers.28.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
| 248 |
+
"model.layers.28.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 249 |
+
"model.layers.28.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 250 |
+
"model.layers.29.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 251 |
+
"model.layers.29.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 252 |
+
"model.layers.29.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
| 253 |
+
"model.layers.29.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 254 |
+
"model.layers.29.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 255 |
+
"model.layers.29.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 256 |
+
"model.layers.29.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 257 |
+
"model.layers.29.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 258 |
+
"model.layers.29.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
|
| 259 |
+
"model.layers.29.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 260 |
+
"model.layers.29.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
| 261 |
+
"model.layers.3.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 262 |
+
"model.layers.3.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 263 |
+
"model.layers.3.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 264 |
+
"model.layers.3.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 265 |
+
"model.layers.3.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 266 |
+
"model.layers.3.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 267 |
+
"model.layers.3.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 268 |
+
"model.layers.3.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 269 |
+
"model.layers.3.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 270 |
+
"model.layers.3.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
|
| 271 |
+
"model.layers.3.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 272 |
+
"model.layers.30.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 273 |
+
"model.layers.30.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 274 |
+
"model.layers.30.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
|
| 275 |
+
"model.layers.30.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 276 |
+
"model.layers.30.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 277 |
+
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
| 278 |
+
"model.layers.30.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
| 279 |
+
"model.layers.30.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 280 |
+
"model.layers.30.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 281 |
+
"model.layers.30.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 282 |
+
"model.layers.30.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
| 283 |
+
"model.layers.31.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 284 |
+
"model.layers.31.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
| 285 |
+
"model.layers.31.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 286 |
+
"model.layers.31.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 287 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 288 |
+
"model.layers.31.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 289 |
+
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 290 |
+
"model.layers.31.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 291 |
+
"model.layers.31.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 292 |
+
"model.layers.31.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 293 |
+
"model.layers.31.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 294 |
+
"model.layers.32.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 295 |
+
"model.layers.32.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
| 296 |
+
"model.layers.32.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
| 297 |
+
"model.layers.32.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 298 |
+
"model.layers.32.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 299 |
+
"model.layers.32.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
| 300 |
+
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 301 |
+
"model.layers.32.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 302 |
+
"model.layers.32.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 303 |
+
"model.layers.32.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
| 304 |
+
"model.layers.32.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
| 305 |
+
"model.layers.33.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 306 |
+
"model.layers.33.mlp.down_proj.weight": "model-00014-of-00014.safetensors",
|
| 307 |
+
"model.layers.33.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 308 |
+
"model.layers.33.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
| 309 |
+
"model.layers.33.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 310 |
+
"model.layers.33.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 311 |
+
"model.layers.33.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 312 |
+
"model.layers.33.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 313 |
+
"model.layers.33.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 314 |
+
"model.layers.33.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 315 |
+
"model.layers.33.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 316 |
+
"model.layers.34.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 317 |
+
"model.layers.34.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
|
| 318 |
+
"model.layers.34.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
| 319 |
+
"model.layers.34.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 320 |
+
"model.layers.34.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 321 |
+
"model.layers.34.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 322 |
+
"model.layers.34.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 323 |
+
"model.layers.34.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
|
| 324 |
+
"model.layers.34.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 325 |
+
"model.layers.34.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 326 |
+
"model.layers.34.self_attn.v_proj.weight": "model-00014-of-00014.safetensors",
|
| 327 |
+
"model.layers.35.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 328 |
+
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 329 |
+
"model.layers.35.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 330 |
+
"model.layers.35.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 331 |
+
"model.layers.35.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 332 |
+
"model.layers.35.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
| 333 |
+
"model.layers.35.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 334 |
+
"model.layers.35.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 335 |
+
"model.layers.35.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 336 |
+
"model.layers.35.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
|
| 337 |
+
"model.layers.35.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
|
| 338 |
+
"model.layers.36.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 339 |
+
"model.layers.36.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 340 |
+
"model.layers.36.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 341 |
+
"model.layers.36.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
| 342 |
+
"model.layers.36.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 343 |
+
"model.layers.36.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
| 344 |
+
"model.layers.36.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 345 |
+
"model.layers.36.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
| 346 |
+
"model.layers.36.self_attn.q_norm.weight": "model-00014-of-00014.safetensors",
|
| 347 |
+
"model.layers.36.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 348 |
+
"model.layers.36.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 349 |
+
"model.layers.37.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 350 |
+
"model.layers.37.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
| 351 |
+
"model.layers.37.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 352 |
+
"model.layers.37.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 353 |
+
"model.layers.37.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 354 |
+
"model.layers.37.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
| 355 |
+
"model.layers.37.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 356 |
+
"model.layers.37.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 357 |
+
"model.layers.37.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 358 |
+
"model.layers.37.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 359 |
+
"model.layers.37.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
| 360 |
+
"model.layers.38.input_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 361 |
+
"model.layers.38.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 362 |
+
"model.layers.38.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
| 363 |
+
"model.layers.38.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 364 |
+
"model.layers.38.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 365 |
+
"model.layers.38.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 366 |
+
"model.layers.38.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 367 |
+
"model.layers.38.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 368 |
+
"model.layers.38.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 369 |
+
"model.layers.38.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 370 |
+
"model.layers.38.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 371 |
+
"model.layers.39.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 372 |
+
"model.layers.39.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 373 |
+
"model.layers.39.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 374 |
+
"model.layers.39.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
| 375 |
+
"model.layers.39.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 376 |
+
"model.layers.39.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
| 377 |
+
"model.layers.39.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 378 |
+
"model.layers.39.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 379 |
+
"model.layers.39.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 380 |
+
"model.layers.39.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 381 |
+
"model.layers.39.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 382 |
+
"model.layers.4.input_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 383 |
+
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
| 384 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
| 385 |
+
"model.layers.4.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
|
| 386 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 387 |
+
"model.layers.4.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 388 |
+
"model.layers.4.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 389 |
+
"model.layers.4.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
| 390 |
+
"model.layers.4.self_attn.q_norm.weight": "model-00014-of-00014.safetensors",
|
| 391 |
+
"model.layers.4.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 392 |
+
"model.layers.4.self_attn.v_proj.weight": "model-00014-of-00014.safetensors",
|
| 393 |
+
"model.layers.40.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 394 |
+
"model.layers.40.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
| 395 |
+
"model.layers.40.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 396 |
+
"model.layers.40.mlp.up_proj.weight": "model-00014-of-00014.safetensors",
|
| 397 |
+
"model.layers.40.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 398 |
+
"model.layers.40.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 399 |
+
"model.layers.40.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 400 |
+
"model.layers.40.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 401 |
+
"model.layers.40.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
| 402 |
+
"model.layers.40.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 403 |
+
"model.layers.40.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 404 |
+
"model.layers.41.input_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 405 |
+
"model.layers.41.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 406 |
+
"model.layers.41.mlp.gate_proj.weight": "model-00014-of-00014.safetensors",
|
| 407 |
+
"model.layers.41.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 408 |
+
"model.layers.41.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 409 |
+
"model.layers.41.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 410 |
+
"model.layers.41.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
| 411 |
+
"model.layers.41.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
| 412 |
+
"model.layers.41.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
|
| 413 |
+
"model.layers.41.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
| 414 |
+
"model.layers.41.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 415 |
+
"model.layers.42.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 416 |
+
"model.layers.42.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
| 417 |
+
"model.layers.42.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
| 418 |
+
"model.layers.42.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 419 |
+
"model.layers.42.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 420 |
+
"model.layers.42.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
|
| 421 |
+
"model.layers.42.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 422 |
+
"model.layers.42.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
|
| 423 |
+
"model.layers.42.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 424 |
+
"model.layers.42.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
| 425 |
+
"model.layers.42.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 426 |
+
"model.layers.43.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 427 |
+
"model.layers.43.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 428 |
+
"model.layers.43.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 429 |
+
"model.layers.43.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 430 |
+
"model.layers.43.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 431 |
+
"model.layers.43.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 432 |
+
"model.layers.43.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 433 |
+
"model.layers.43.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 434 |
+
"model.layers.43.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
| 435 |
+
"model.layers.43.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
| 436 |
+
"model.layers.43.self_attn.v_proj.weight": "model-00014-of-00014.safetensors",
|
| 437 |
+
"model.layers.44.input_layernorm.weight": "model-00010-of-00014.safetensors",
|
| 438 |
+
"model.layers.44.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 439 |
+
"model.layers.44.mlp.gate_proj.weight": "model-00014-of-00014.safetensors",
|
| 440 |
+
"model.layers.44.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 441 |
+
"model.layers.44.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 442 |
+
"model.layers.44.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 443 |
+
"model.layers.44.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
|
| 444 |
+
"model.layers.44.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 445 |
+
"model.layers.44.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 446 |
+
"model.layers.44.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 447 |
+
"model.layers.44.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
|
| 448 |
+
"model.layers.45.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 449 |
+
"model.layers.45.mlp.down_proj.weight": "model-00014-of-00014.safetensors",
|
| 450 |
+
"model.layers.45.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 451 |
+
"model.layers.45.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 452 |
+
"model.layers.45.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 453 |
+
"model.layers.45.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 454 |
+
"model.layers.45.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
|
| 455 |
+
"model.layers.45.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 456 |
+
"model.layers.45.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 457 |
+
"model.layers.45.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 458 |
+
"model.layers.45.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 459 |
+
"model.layers.46.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 460 |
+
"model.layers.46.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
| 461 |
+
"model.layers.46.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 462 |
+
"model.layers.46.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
|
| 463 |
+
"model.layers.46.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 464 |
+
"model.layers.46.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
|
| 465 |
+
"model.layers.46.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 466 |
+
"model.layers.46.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 467 |
+
"model.layers.46.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 468 |
+
"model.layers.46.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
| 469 |
+
"model.layers.46.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 470 |
+
"model.layers.47.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 471 |
+
"model.layers.47.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
|
| 472 |
+
"model.layers.47.mlp.gate_proj.weight": "model-00014-of-00014.safetensors",
|
| 473 |
+
"model.layers.47.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
| 474 |
+
"model.layers.47.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 475 |
+
"model.layers.47.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 476 |
+
"model.layers.47.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 477 |
+
"model.layers.47.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 478 |
+
"model.layers.47.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 479 |
+
"model.layers.47.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
|
| 480 |
+
"model.layers.47.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 481 |
+
"model.layers.48.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 482 |
+
"model.layers.48.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
|
| 483 |
+
"model.layers.48.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
|
| 484 |
+
"model.layers.48.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
|
| 485 |
+
"model.layers.48.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 486 |
+
"model.layers.48.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
|
| 487 |
+
"model.layers.48.self_attn.k_proj.weight": "model-00014-of-00014.safetensors",
|
| 488 |
+
"model.layers.48.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
| 489 |
+
"model.layers.48.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 490 |
+
"model.layers.48.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 491 |
+
"model.layers.48.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 492 |
+
"model.layers.49.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 493 |
+
"model.layers.49.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 494 |
+
"model.layers.49.mlp.gate_proj.weight": "model-00014-of-00014.safetensors",
|
| 495 |
+
"model.layers.49.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 496 |
+
"model.layers.49.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 497 |
+
"model.layers.49.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 498 |
+
"model.layers.49.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
| 499 |
+
"model.layers.49.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
|
| 500 |
+
"model.layers.49.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 501 |
+
"model.layers.49.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
| 502 |
+
"model.layers.49.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
|
| 503 |
+
"model.layers.5.input_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 504 |
+
"model.layers.5.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
|
| 505 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 506 |
+
"model.layers.5.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 507 |
+
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 508 |
+
"model.layers.5.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
|
| 509 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00014-of-00014.safetensors",
|
| 510 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 511 |
+
"model.layers.5.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 512 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
| 513 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 514 |
+
"model.layers.50.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 515 |
+
"model.layers.50.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
|
| 516 |
+
"model.layers.50.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
|
| 517 |
+
"model.layers.50.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 518 |
+
"model.layers.50.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 519 |
+
"model.layers.50.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
| 520 |
+
"model.layers.50.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
|
| 521 |
+
"model.layers.50.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
|
| 522 |
+
"model.layers.50.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 523 |
+
"model.layers.50.self_attn.q_proj.weight": "model-00014-of-00014.safetensors",
|
| 524 |
+
"model.layers.50.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
|
| 525 |
+
"model.layers.51.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 526 |
+
"model.layers.51.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
| 527 |
+
"model.layers.51.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
| 528 |
+
"model.layers.51.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
|
| 529 |
+
"model.layers.51.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 530 |
+
"model.layers.51.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 531 |
+
"model.layers.51.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 532 |
+
"model.layers.51.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 533 |
+
"model.layers.51.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
| 534 |
+
"model.layers.51.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 535 |
+
"model.layers.51.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
|
| 536 |
+
"model.layers.52.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 537 |
+
"model.layers.52.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 538 |
+
"model.layers.52.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
| 539 |
+
"model.layers.52.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 540 |
+
"model.layers.52.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 541 |
+
"model.layers.52.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 542 |
+
"model.layers.52.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
| 543 |
+
"model.layers.52.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
| 544 |
+
"model.layers.52.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
|
| 545 |
+
"model.layers.52.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
| 546 |
+
"model.layers.52.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 547 |
+
"model.layers.53.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 548 |
+
"model.layers.53.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
| 549 |
+
"model.layers.53.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 550 |
+
"model.layers.53.mlp.up_proj.weight": "model-00014-of-00014.safetensors",
|
| 551 |
+
"model.layers.53.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 552 |
+
"model.layers.53.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
| 553 |
+
"model.layers.53.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 554 |
+
"model.layers.53.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 555 |
+
"model.layers.53.self_attn.q_norm.weight": "model-00014-of-00014.safetensors",
|
| 556 |
+
"model.layers.53.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 557 |
+
"model.layers.53.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 558 |
+
"model.layers.54.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 559 |
+
"model.layers.54.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 560 |
+
"model.layers.54.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 561 |
+
"model.layers.54.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 562 |
+
"model.layers.54.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 563 |
+
"model.layers.54.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 564 |
+
"model.layers.54.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
|
| 565 |
+
"model.layers.54.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
| 566 |
+
"model.layers.54.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 567 |
+
"model.layers.54.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
|
| 568 |
+
"model.layers.54.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 569 |
+
"model.layers.55.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 570 |
+
"model.layers.55.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 571 |
+
"model.layers.55.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
|
| 572 |
+
"model.layers.55.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 573 |
+
"model.layers.55.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 574 |
+
"model.layers.55.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
| 575 |
+
"model.layers.55.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 576 |
+
"model.layers.55.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 577 |
+
"model.layers.55.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
| 578 |
+
"model.layers.55.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 579 |
+
"model.layers.55.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
|
| 580 |
+
"model.layers.56.input_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 581 |
+
"model.layers.56.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
|
| 582 |
+
"model.layers.56.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 583 |
+
"model.layers.56.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
|
| 584 |
+
"model.layers.56.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 585 |
+
"model.layers.56.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 586 |
+
"model.layers.56.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
|
| 587 |
+
"model.layers.56.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
|
| 588 |
+
"model.layers.56.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 589 |
+
"model.layers.56.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
|
| 590 |
+
"model.layers.56.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
|
| 591 |
+
"model.layers.57.input_layernorm.weight": "model-00005-of-00014.safetensors",
|
| 592 |
+
"model.layers.57.mlp.down_proj.weight": "model-00014-of-00014.safetensors",
|
| 593 |
+
"model.layers.57.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 594 |
+
"model.layers.57.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
|
| 595 |
+
"model.layers.57.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 596 |
+
"model.layers.57.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
|
| 597 |
+
"model.layers.57.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 598 |
+
"model.layers.57.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
|
| 599 |
+
"model.layers.57.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 600 |
+
"model.layers.57.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
|
| 601 |
+
"model.layers.57.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
|
| 602 |
+
"model.layers.58.input_layernorm.weight": "model-00011-of-00014.safetensors",
|
| 603 |
+
"model.layers.58.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
| 604 |
+
"model.layers.58.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
|
| 605 |
+
"model.layers.58.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 606 |
+
"model.layers.58.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
|
| 607 |
+
"model.layers.58.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
|
| 608 |
+
"model.layers.58.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
|
| 609 |
+
"model.layers.58.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 610 |
+
"model.layers.58.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 611 |
+
"model.layers.58.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 612 |
+
"model.layers.58.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 613 |
+
"model.layers.59.input_layernorm.weight": "model-00013-of-00014.safetensors",
|
| 614 |
+
"model.layers.59.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
|
| 615 |
+
"model.layers.59.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
|
| 616 |
+
"model.layers.59.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
|
| 617 |
+
"model.layers.59.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 618 |
+
"model.layers.59.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
|
| 619 |
+
"model.layers.59.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
|
| 620 |
+
"model.layers.59.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 621 |
+
"model.layers.59.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
|
| 622 |
+
"model.layers.59.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
|
| 623 |
+
"model.layers.59.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 624 |
+
"model.layers.6.input_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 625 |
+
"model.layers.6.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
| 626 |
+
"model.layers.6.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
| 627 |
+
"model.layers.6.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
|
| 628 |
+
"model.layers.6.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 629 |
+
"model.layers.6.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
|
| 630 |
+
"model.layers.6.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 631 |
+
"model.layers.6.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
|
| 632 |
+
"model.layers.6.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
|
| 633 |
+
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
|
| 634 |
+
"model.layers.6.self_attn.v_proj.weight": "model-00014-of-00014.safetensors",
|
| 635 |
+
"model.layers.60.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 636 |
+
"model.layers.60.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 637 |
+
"model.layers.60.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 638 |
+
"model.layers.60.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 639 |
+
"model.layers.60.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
|
| 640 |
+
"model.layers.60.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
|
| 641 |
+
"model.layers.60.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
|
| 642 |
+
"model.layers.60.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
|
| 643 |
+
"model.layers.60.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
|
| 644 |
+
"model.layers.60.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
|
| 645 |
+
"model.layers.60.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
|
| 646 |
+
"model.layers.61.input_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 647 |
+
"model.layers.61.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
|
| 648 |
+
"model.layers.61.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
|
| 649 |
+
"model.layers.61.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 650 |
+
"model.layers.61.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 651 |
+
"model.layers.61.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
|
| 652 |
+
"model.layers.61.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
|
| 653 |
+
"model.layers.61.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
|
| 654 |
+
"model.layers.61.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
|
| 655 |
+
"model.layers.61.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 656 |
+
"model.layers.61.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
|
| 657 |
+
"model.layers.62.input_layernorm.weight": "model-00006-of-00014.safetensors",
|
| 658 |
+
"model.layers.62.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
|
| 659 |
+
"model.layers.62.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
|
| 660 |
+
"model.layers.62.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
|
| 661 |
+
"model.layers.62.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
|
| 662 |
+
"model.layers.62.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 663 |
+
"model.layers.62.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
|
| 664 |
+
"model.layers.62.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
|
| 665 |
+
"model.layers.62.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
|
| 666 |
+
"model.layers.62.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 667 |
+
"model.layers.62.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 668 |
+
"model.layers.63.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 669 |
+
"model.layers.63.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
|
| 670 |
+
"model.layers.63.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
|
| 671 |
+
"model.layers.63.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
|
| 672 |
+
"model.layers.63.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
|
| 673 |
+
"model.layers.63.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 674 |
+
"model.layers.63.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 675 |
+
"model.layers.63.self_attn.o_proj.weight": "model-00014-of-00014.safetensors",
|
| 676 |
+
"model.layers.63.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
|
| 677 |
+
"model.layers.63.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
|
| 678 |
+
"model.layers.63.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
|
| 679 |
+
"model.layers.7.input_layernorm.weight": "model-00008-of-00014.safetensors",
|
| 680 |
+
"model.layers.7.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
|
| 681 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
|
| 682 |
+
"model.layers.7.mlp.up_proj.weight": "model-00014-of-00014.safetensors",
|
| 683 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
|
| 684 |
+
"model.layers.7.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
|
| 685 |
+
"model.layers.7.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 686 |
+
"model.layers.7.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
| 687 |
+
"model.layers.7.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
|
| 688 |
+
"model.layers.7.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
|
| 689 |
+
"model.layers.7.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
|
| 690 |
+
"model.layers.8.input_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 691 |
+
"model.layers.8.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
|
| 692 |
+
"model.layers.8.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
|
| 693 |
+
"model.layers.8.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
|
| 694 |
+
"model.layers.8.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
|
| 695 |
+
"model.layers.8.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
|
| 696 |
+
"model.layers.8.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
|
| 697 |
+
"model.layers.8.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
|
| 698 |
+
"model.layers.8.self_attn.q_norm.weight": "model-00014-of-00014.safetensors",
|
| 699 |
+
"model.layers.8.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
|
| 700 |
+
"model.layers.8.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 701 |
+
"model.layers.9.input_layernorm.weight": "model-00007-of-00014.safetensors",
|
| 702 |
+
"model.layers.9.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
|
| 703 |
+
"model.layers.9.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
|
| 704 |
+
"model.layers.9.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
|
| 705 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
|
| 706 |
+
"model.layers.9.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
|
| 707 |
+
"model.layers.9.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
|
| 708 |
+
"model.layers.9.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
|
| 709 |
+
"model.layers.9.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
|
| 710 |
+
"model.layers.9.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
|
| 711 |
+
"model.layers.9.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
|
| 712 |
+
"model.norm.weight": "model-00011-of-00014.safetensors"
|
| 713 |
+
}
|
| 714 |
+
}
|
hf/special_tokens_map.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": {
|
| 25 |
+
"content": "<|endoftext|>",
|
| 26 |
+
"lstrip": false,
|
| 27 |
+
"normalized": false,
|
| 28 |
+
"rstrip": false,
|
| 29 |
+
"single_word": false
|
| 30 |
+
}
|
| 31 |
+
}
|
hf/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
| 3 |
+
size 11422654
|
hf/tokenizer_config.json
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
},
|
| 181 |
+
"151665": {
|
| 182 |
+
"content": "<tool_response>",
|
| 183 |
+
"lstrip": false,
|
| 184 |
+
"normalized": false,
|
| 185 |
+
"rstrip": false,
|
| 186 |
+
"single_word": false,
|
| 187 |
+
"special": false
|
| 188 |
+
},
|
| 189 |
+
"151666": {
|
| 190 |
+
"content": "</tool_response>",
|
| 191 |
+
"lstrip": false,
|
| 192 |
+
"normalized": false,
|
| 193 |
+
"rstrip": false,
|
| 194 |
+
"single_word": false,
|
| 195 |
+
"special": false
|
| 196 |
+
},
|
| 197 |
+
"151667": {
|
| 198 |
+
"content": "<think>",
|
| 199 |
+
"lstrip": false,
|
| 200 |
+
"normalized": false,
|
| 201 |
+
"rstrip": false,
|
| 202 |
+
"single_word": false,
|
| 203 |
+
"special": false
|
| 204 |
+
},
|
| 205 |
+
"151668": {
|
| 206 |
+
"content": "</think>",
|
| 207 |
+
"lstrip": false,
|
| 208 |
+
"normalized": false,
|
| 209 |
+
"rstrip": false,
|
| 210 |
+
"single_word": false,
|
| 211 |
+
"special": false
|
| 212 |
+
}
|
| 213 |
+
},
|
| 214 |
+
"additional_special_tokens": [
|
| 215 |
+
"<|im_start|>",
|
| 216 |
+
"<|im_end|>",
|
| 217 |
+
"<|object_ref_start|>",
|
| 218 |
+
"<|object_ref_end|>",
|
| 219 |
+
"<|box_start|>",
|
| 220 |
+
"<|box_end|>",
|
| 221 |
+
"<|quad_start|>",
|
| 222 |
+
"<|quad_end|>",
|
| 223 |
+
"<|vision_start|>",
|
| 224 |
+
"<|vision_end|>",
|
| 225 |
+
"<|vision_pad|>",
|
| 226 |
+
"<|image_pad|>",
|
| 227 |
+
"<|video_pad|>"
|
| 228 |
+
],
|
| 229 |
+
"bos_token": null,
|
| 230 |
+
"clean_up_tokenization_spaces": false,
|
| 231 |
+
"eos_token": "<|im_end|>",
|
| 232 |
+
"errors": "replace",
|
| 233 |
+
"extra_special_tokens": {},
|
| 234 |
+
"model_max_length": 131072,
|
| 235 |
+
"pad_token": "<|endoftext|>",
|
| 236 |
+
"split_special_tokens": false,
|
| 237 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 238 |
+
"unk_token": null,
|
| 239 |
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in message.content %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}"
|
| 240 |
+
}
|
hf/tokenizer_config.json.bak
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
},
|
| 181 |
+
"151665": {
|
| 182 |
+
"content": "<tool_response>",
|
| 183 |
+
"lstrip": false,
|
| 184 |
+
"normalized": false,
|
| 185 |
+
"rstrip": false,
|
| 186 |
+
"single_word": false,
|
| 187 |
+
"special": false
|
| 188 |
+
},
|
| 189 |
+
"151666": {
|
| 190 |
+
"content": "</tool_response>",
|
| 191 |
+
"lstrip": false,
|
| 192 |
+
"normalized": false,
|
| 193 |
+
"rstrip": false,
|
| 194 |
+
"single_word": false,
|
| 195 |
+
"special": false
|
| 196 |
+
},
|
| 197 |
+
"151667": {
|
| 198 |
+
"content": "<think>",
|
| 199 |
+
"lstrip": false,
|
| 200 |
+
"normalized": false,
|
| 201 |
+
"rstrip": false,
|
| 202 |
+
"single_word": false,
|
| 203 |
+
"special": false
|
| 204 |
+
},
|
| 205 |
+
"151668": {
|
| 206 |
+
"content": "</think>",
|
| 207 |
+
"lstrip": false,
|
| 208 |
+
"normalized": false,
|
| 209 |
+
"rstrip": false,
|
| 210 |
+
"single_word": false,
|
| 211 |
+
"special": false
|
| 212 |
+
}
|
| 213 |
+
},
|
| 214 |
+
"additional_special_tokens": [
|
| 215 |
+
"<|im_start|>",
|
| 216 |
+
"<|im_end|>",
|
| 217 |
+
"<|object_ref_start|>",
|
| 218 |
+
"<|object_ref_end|>",
|
| 219 |
+
"<|box_start|>",
|
| 220 |
+
"<|box_end|>",
|
| 221 |
+
"<|quad_start|>",
|
| 222 |
+
"<|quad_end|>",
|
| 223 |
+
"<|vision_start|>",
|
| 224 |
+
"<|vision_end|>",
|
| 225 |
+
"<|vision_pad|>",
|
| 226 |
+
"<|image_pad|>",
|
| 227 |
+
"<|video_pad|>"
|
| 228 |
+
],
|
| 229 |
+
"bos_token": null,
|
| 230 |
+
"clean_up_tokenization_spaces": false,
|
| 231 |
+
"eos_token": "<|im_end|>",
|
| 232 |
+
"errors": "replace",
|
| 233 |
+
"extra_special_tokens": {},
|
| 234 |
+
"model_max_length": 131072,
|
| 235 |
+
"pad_token": "<|endoftext|>",
|
| 236 |
+
"split_special_tokens": false,
|
| 237 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 238 |
+
"unk_token": null
|
| 239 |
+
}
|
hf/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
hf_ip/lb_endpoint.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
10.45.17.59:8000
|
hf_ip/load_balancer.log
ADDED
|
@@ -0,0 +1,1198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 2 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 3 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 4 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 5 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 6 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 7 |
+
10.45.190.244 - "GET /v1/models HTTP/1.1" 200 -
|
| 8 |
+
10.45.190.245 - "GET /v1/models HTTP/1.1" 200 -
|
| 9 |
+
10.46.50.247 - "GET /v1/models HTTP/1.1" 200 -
|
| 10 |
+
10.46.17.244 - "GET /v1/models HTTP/1.1" 200 -
|
| 11 |
+
10.46.50.251 - "GET /v1/models HTTP/1.1" 200 -
|
| 12 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 13 |
+
1010.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 14 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 15 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 16 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 17 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 18 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 19 |
+
110.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 20 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1"10.45.110.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 21 |
+
10.46.10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 22 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 23 |
+
10.45.190.244 - "POST /v1/completions HTTP/10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 24 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 25 |
+
10.45.190.10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 26 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 27 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 28 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 29 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 30 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 31 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 32 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1"10.46.510.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 33 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 34 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 35 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1"10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 36 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 37 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 38 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 39 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 40 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 41 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 42 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 43 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 44 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 45 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 46 |
+
10.46.50.10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 47 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 48 |
+
10.45.190.242 - "POST /v1/completions HTTP/10.46.17.2310.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 49 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 50 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 51 |
+
10.46.17.236 - "POST /v1/completions HTTP10.46.17.23610.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 52 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 53 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 54 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 55 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 56 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 57 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 58 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 59 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 60 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 61 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 62 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 63 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 64 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 20010.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 65 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 66 |
+
10.410.46.50.247 - "POST /v1/completions HTTP/1.1" 2010.410.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 67 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 68 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 69 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 70 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 71 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 72 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 73 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 74 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 75 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 20010.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 76 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 77 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 78 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 79 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 80 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 81 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 82 |
+
1010.45.190.245 - "POST /v1/completions HTTP/1.1" 20010.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 83 |
+
10.10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 84 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 85 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 86 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 87 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 88 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 89 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 90 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 91 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 92 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 93 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 94 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 95 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1"10.46.110.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 96 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 97 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 98 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 10.46.10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 99 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 100 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 101 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 102 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 103 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 104 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 105 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 106 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 107 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 108 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 109 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 110 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 111 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 112 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 113 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 114 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 115 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 116 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 117 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 118 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 119 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 120 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 121 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 122 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 123 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 124 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 125 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 126 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 127 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 128 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 129 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 130 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 131 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 132 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 133 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 134 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 135 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 136 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 137 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 138 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 139 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 140 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 141 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 142 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 143 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 144 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 145 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 146 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 147 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 148 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 149 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 150 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 151 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 152 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 153 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 154 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 155 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 156 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 157 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 2010.410.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 158 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 159 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 160 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 161 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 162 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 163 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 164 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 165 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 166 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 167 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 168 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 169 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 170 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 171 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 172 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 173 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 174 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 175 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 176 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 177 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 178 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 179 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 180 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 181 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 182 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 183 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 184 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 185 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 20010.10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 186 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 187 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 188 |
+
110.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 189 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 190 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 191 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 192 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -110.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 193 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 194 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 195 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 196 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 197 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 198 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 199 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 200 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 201 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 202 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 203 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 204 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 205 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 206 |
+
1010.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 207 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 208 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 209 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 210 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 211 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 212 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 213 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 214 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 215 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 216 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 217 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 218 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 219 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 220 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 221 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 222 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 223 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 224 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 225 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 226 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 227 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 228 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 229 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 230 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 231 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 232 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 233 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 234 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 235 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 236 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 237 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 238 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 239 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 240 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 241 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 242 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 243 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 244 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 245 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 246 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 247 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 248 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 249 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 250 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 251 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 252 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 253 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 254 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 255 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 256 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 257 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 258 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 259 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 260 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 261 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 210.4610.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 262 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 263 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 264 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 265 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 266 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 267 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 268 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 269 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 270 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 271 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 1010.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 272 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 273 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 274 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 275 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 276 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 277 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 278 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 279 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 280 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 281 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 282 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 283 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 284 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 285 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 286 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 287 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 288 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 289 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 290 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 291 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 1010.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 292 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 293 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 294 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 295 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 296 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 297 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 298 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 299 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 300 |
+
1010.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 301 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 302 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 303 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 304 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 20010.10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 305 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 306 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 307 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 308 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 309 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 310 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 311 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 312 |
+
110.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 313 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 314 |
+
10.10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 315 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 316 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 210.4610.46.17.240 - "POST /v1/completions HTTP/1.1" 210.4610.45.190.245 - "POST /v1/completions HTTP/1.1" 10.46.10.46.50.247 - "POST /v1/completions HTTP/1.1" 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 317 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 318 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 319 |
+
10.4510.45.190.244 - "POST /v1/completions HTTP/1.1" 210.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 320 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 321 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 322 |
+
10.4610.46.50.247 - "POST /v1/completions HTTP/1.1" 210.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 323 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 324 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 325 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 326 |
+
10.410.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 327 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 210.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 328 |
+
10.410.46.17.244 - "POST /v1/completions HTTP/1.1" 2010.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 329 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 330 |
+
10.4510.46.17.244 - "POST /v1/completions HTTP/1.1" 2010.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 331 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 332 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 333 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 334 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 335 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 336 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 337 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 338 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 339 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 340 |
+
10.410.46.17.236 - "POST /v1/completions HTTP/1.1" 2010.410.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 341 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 342 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 343 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 344 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 345 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 346 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 347 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 348 |
+
10.410.46.17.244 - "POST /v1/completions HTTP/1.1" 2010.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 349 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 350 |
+
10.10.46.50.251 - "POST /v1/completions HTTP/1.1" 20010.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 351 |
+
10.10.46.50.203 - "POST /v1/completions HTTP/1.1" 20010.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 352 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 353 |
+
10.10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 354 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 355 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 356 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 357 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 358 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 359 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 360 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 361 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 362 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 363 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 364 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 365 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 366 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 367 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 368 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 369 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -110.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 370 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 371 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 372 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 373 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 20010.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 374 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 375 |
+
1010.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 376 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 377 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 378 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 379 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 2010.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 380 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 381 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 382 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 383 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 384 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 385 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 386 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 387 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 388 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 389 |
+
1010.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 390 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 391 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 392 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 393 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 394 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 395 |
+
10.10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 396 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 397 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 398 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 399 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 400 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 401 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 402 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 403 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 404 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 405 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 406 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 407 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 408 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 409 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 410 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 411 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 412 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 413 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 414 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 415 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 416 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 417 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 418 |
+
110.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 419 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 420 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 421 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 422 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 423 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 424 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 425 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 426 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 427 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 428 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 429 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 430 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 431 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 432 |
+
110.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 433 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 434 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 435 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 436 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 437 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 438 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 439 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 440 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 441 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 442 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 443 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 444 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 445 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 446 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 447 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 448 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 449 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 20010.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 450 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 451 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 452 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 453 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 454 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 455 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 456 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 457 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 458 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 459 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 460 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 461 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 462 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 463 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 464 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 465 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 466 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 467 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 468 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 469 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 470 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 471 |
+
10.10.45.190.244 - "POST /v1/completions HTTP/1.1" 20010.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 472 |
+
10.10.46.17.2410.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 473 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 474 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 475 |
+
10.46.50.251 - "POST /v1/completions HTTP10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 476 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 477 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 478 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 479 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 480 |
+
10.46.17.236 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 481 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 482 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 483 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 484 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 485 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 486 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 487 |
+
10.46.17.236 - "POST /v1/completions HTTP/10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 488 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 489 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 490 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 491 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 492 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 493 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 494 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 495 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 496 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 497 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 498 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 499 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 500 |
+
10.45.190.2410.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 501 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 502 |
+
10.46.17.236 - "POST /v1/completions HTTP/10.46.17.2310.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 503 |
+
10.46.17.244 - "POST /v1/completions HTTP10.46.50.25110.46.17.236 - "POST /v1/completions HTTP10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 504 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 505 |
+
10.46.50.203 10.46.17.240 - "POST /v1/completions HTT10.45.190.24510.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 506 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 507 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 508 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 509 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 510 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 511 |
+
10.45.190.244 - "POST /v1/completions 10.45.190.245 - 10.46.17.244 - "POST /v1/completions H10.46.50.251 - 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 512 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 513 |
+
10.46.17.240 - "POST /v1/completions H10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 514 |
+
10.46.17.236 -10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 515 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 516 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 517 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 518 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 519 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 520 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 521 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 522 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 523 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 524 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 525 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 526 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 527 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 528 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 529 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 530 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 531 |
+
10.45.190.244 - "POST /v1/completi10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 532 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 533 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 534 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 535 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 536 |
+
10.45.190.242 - "PO10.46.50.247 - "POST /v1/completion10.45.190.245 - "P10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 537 |
+
10.46.50.203 - "POST /v1/completions HTTP/110.46.50.210.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 538 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 539 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 540 |
+
10.45.190.244 - "POST /v1/completions HTTP/10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 541 |
+
10.46.17.23610.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 542 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 543 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 544 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 545 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 546 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 547 |
+
10.46.17.236 - "POST /v1/completions 10.10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 548 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 549 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 210.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 550 |
+
10.10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 551 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 552 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 553 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 554 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 555 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 556 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 557 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 558 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 559 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 560 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 561 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 562 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 563 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 564 |
+
10.46.50.203 - "10.46.17.236 - "POST /v1/completions 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 565 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 566 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 567 |
+
10.45.190.245 -10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 568 |
+
10.46.50.251 - "POST /v1/completions HT10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 569 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 570 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 571 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 572 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 573 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 574 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 575 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 576 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 577 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 578 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 579 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 580 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 581 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 582 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 583 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 584 |
+
10.410.46.17.236 - "POST /v1/completions HTTP/1.1" 2010.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 585 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 586 |
+
10.10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 587 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 588 |
+
10.45.190.210.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 589 |
+
10.45.190.242 - "POST /v1/completions HTTP/110.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 590 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 591 |
+
10.45.190.210.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 592 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 593 |
+
10.46.17.244 - "POST /v1/completions HT10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 594 |
+
10.45.190.245 10.46.17.240 - "POST /v1/completions HTT10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 595 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 596 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 597 |
+
10.45.190.24210.46.50.251 - "POST /v1/completions HTTP/1.10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 598 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 599 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 600 |
+
10.46.17.2410.46.50.203 - "POST /v1/completions HTTP/10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 601 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 602 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 603 |
+
10.45.190.210.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 604 |
+
10.46.17.236 - "POST /v1/completions HTTP/10.46.17.2410.45.190.244 - "POST /v1/completions HTTP10.410.46.1710.45.190.242 - "POST /v1/completions HTT10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 605 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 606 |
+
10.410.46.50.251 - "POST /v1/completions HTTP/1.1" 2010.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 607 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 608 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 609 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 610 |
+
10.4610.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 611 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 612 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 210.4610.45.190.242 - "POST /v1/completions HTTP/1.1" 10.45.10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 613 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 614 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 615 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 616 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 617 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 618 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 619 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 620 |
+
10.46.10.45.190.244 - "POST /v1/completions HTTP/1.1"10.46.510.46.17.236 - "POST /v1/completions HTTP/1.1"10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 621 |
+
10.46.110.46.17.236 - "POST /v1/completions HTTP/1.1"10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 622 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 623 |
+
10.4610.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 624 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 625 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1"10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 626 |
+
10.46.10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 627 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 628 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 629 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 630 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 631 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 632 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 633 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 634 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 635 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 636 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 637 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 638 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 639 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 640 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 641 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 642 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 643 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 644 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 645 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 646 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 647 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 648 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 649 |
+
10.46.50.203 - "POST /v1/completions HTTP/110.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 650 |
+
10.46.17.2410.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 651 |
+
10.46.17.244 - "POST /v1/completions HTTP10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 652 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 653 |
+
10.46.17.2410.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 654 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 655 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 656 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 657 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 658 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 659 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 660 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 661 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 662 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 663 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.10.46.17.10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 664 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 665 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 666 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 667 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 668 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 669 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 670 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 671 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 672 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 673 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 674 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 675 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 676 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 677 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 678 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 679 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 680 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 681 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 682 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 683 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 684 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 685 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 686 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 687 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 688 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 689 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 690 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 691 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 692 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 693 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 694 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 695 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 696 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 697 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 698 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 699 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 700 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 701 |
+
10.46.17.240 - "POST /v1/completions HTTP/10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 702 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 703 |
+
10.46.50.2510.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 704 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 705 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 706 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 707 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 708 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 709 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 710 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 711 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 712 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 713 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 714 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 715 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 716 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 717 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 718 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 719 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 720 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 721 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 722 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 723 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 724 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 725 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 726 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 727 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 728 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 729 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 730 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 731 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 732 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 733 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 734 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 735 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 736 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 737 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 738 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 739 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 740 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 741 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 742 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 743 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 744 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 745 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 746 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 747 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 748 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 749 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 750 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 751 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 752 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 753 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 754 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 755 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 756 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 757 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 758 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 759 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 760 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 761 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 762 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 763 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 764 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 765 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 766 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 767 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 768 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 769 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 770 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 771 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 772 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 773 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 774 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 775 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 776 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 777 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 778 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 779 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 780 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 781 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 782 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 783 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 784 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 785 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 786 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 787 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 788 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 789 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 790 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 791 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 792 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 793 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 794 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 795 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 796 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 797 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 798 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 799 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 800 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 801 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 802 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 803 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 804 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 805 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 806 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 807 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 808 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 809 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 810 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 811 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 812 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 813 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 814 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 815 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 816 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 817 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 818 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 819 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 820 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 821 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 822 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 823 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 824 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 825 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 826 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 827 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 828 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 829 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 830 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 831 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 832 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 833 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 834 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 835 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 836 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 837 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 838 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 839 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 840 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 841 |
+
110.45.190.242 - "POST /v1/completions HTTP/1.1" 200 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 842 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 843 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 844 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 845 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 846 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 847 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 848 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 849 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 850 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 851 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 852 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 853 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 854 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 855 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 856 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 857 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 858 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 859 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 860 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 861 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 862 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 863 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 864 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 865 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 866 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 867 |
+
10.4610.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 868 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 210.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 869 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 870 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 871 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 872 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 873 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 874 |
+
10.10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 875 |
+
1010.46.17.240 - "POST /v1/completions HTTP/1.1" 200 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 876 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 877 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 878 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 879 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 880 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 881 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 882 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 883 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 884 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 885 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 886 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 887 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 888 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 889 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 890 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 891 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 892 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 893 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 894 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 895 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 896 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 897 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 898 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 899 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 900 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 901 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 902 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 903 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 904 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 905 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 906 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 907 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 908 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 909 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 910 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 911 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 912 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 913 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 914 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 915 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 916 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 917 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 918 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 919 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 920 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 921 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 922 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 923 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 924 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 925 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 926 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 927 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 928 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 929 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 930 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 931 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 932 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 933 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 934 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 935 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 936 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 937 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 938 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 939 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 940 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 941 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 942 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 943 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 944 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 945 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 946 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 947 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 948 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 949 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 950 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 951 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 952 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 953 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 954 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 955 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 956 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 957 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 958 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 959 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 960 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 961 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 962 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 963 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 964 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 965 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 966 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 967 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 968 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 969 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 970 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 971 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 972 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 973 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 974 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 975 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 976 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 977 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 978 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 979 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 980 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 981 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 982 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 983 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 984 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 985 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 986 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 987 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 988 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 989 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 990 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 991 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 992 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 993 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 994 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 995 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 996 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 997 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 998 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 999 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1000 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1001 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1002 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1003 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1004 |
+
10.46.17.210.410.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1005 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 210.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1006 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1007 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1008 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1009 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1010 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1011 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1012 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1013 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1014 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1015 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1016 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1"10.46.510.46.50.247 - "POST /v1/completions HTTP/1.1"10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1017 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1018 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1019 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1020 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1021 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1022 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1023 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1024 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1025 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1026 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1027 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1028 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1029 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1030 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1031 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1032 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1033 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1034 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1035 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1036 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1037 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1038 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1039 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1040 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1041 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1042 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1043 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1044 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1045 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1046 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1047 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1048 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1049 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1050 |
+
10.4610.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1051 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1052 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1053 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1054 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1055 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1056 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1057 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1058 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1059 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1060 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1061 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1062 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1063 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1064 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1065 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1066 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1067 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1068 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1069 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1070 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1071 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1072 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1073 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1074 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1075 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1076 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1077 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1078 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1079 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1080 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1081 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1082 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1083 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1084 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1085 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1086 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1087 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1088 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1089 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1090 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1091 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1092 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1093 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1094 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1095 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1096 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1097 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1098 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1099 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1100 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1101 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1102 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1103 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1104 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1105 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1106 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1107 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1108 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1109 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1110 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1111 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1112 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1113 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1114 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1115 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1116 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1117 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1118 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1119 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1120 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1121 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1122 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1123 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1124 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1125 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1126 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1127 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1128 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1129 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1130 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1131 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1132 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1133 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1134 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1135 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1136 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1137 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1138 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1139 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1140 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1141 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1142 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1143 |
+
1" 200 -
|
| 1144 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1145 |
+
10.10.46.17.240 - "POST /v1/completions HTTP/1.1" 20010.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1146 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1147 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1148 |
+
-
|
| 1149 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1150 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1151 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1152 |
+
-
|
| 1153 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1154 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1155 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1156 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1157 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1158 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1159 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1160 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1161 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1162 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1163 |
+
v1/completions HTTP/1.1" 200 -
|
| 1164 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1165 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1166 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1167 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1168 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1169 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1170 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1171 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1172 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1173 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1174 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1175 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1176 |
+
1/completions HTTP/1.1" 200 -
|
| 1177 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1178 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1179 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1180 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1181 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1182 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1183 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1184 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1185 |
+
03 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1186 |
+
10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1187 |
+
10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1188 |
+
|
| 1189 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1190 |
+
10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1191 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1192 |
+
10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1193 |
+
10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1194 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1195 |
+
10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1196 |
+
10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1197 |
+
10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
|
| 1198 |
+
10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
|
hf_ip/vllm_gpu0.log
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 2 |
+
warnings.warn(
|
| 3 |
+
INFO 01-04 13:13:24 [__init__.py:239] Automatically detected platform cuda.
|
| 4 |
+
INFO 01-04 13:13:26 [api_server.py:1043] vLLM API server version 0.8.5
|
| 5 |
+
INFO 01-04 13:13:26 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8001, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
|
| 6 |
+
INFO 01-04 13:13:33 [config.py:717] This model supports multiple tasks: {'generate', 'reward', 'embed', 'score', 'classify'}. Defaulting to 'generate'.
|
| 7 |
+
INFO 01-04 13:13:33 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
|
| 8 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 9 |
+
warnings.warn(
|
| 10 |
+
INFO 01-04 13:13:38 [__init__.py:239] Automatically detected platform cuda.
|
| 11 |
+
INFO 01-04 13:13:41 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
|
| 12 |
+
WARNING 01-04 13:13:41 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7fc89ba32170>
|
| 13 |
+
INFO 01-04 13:13:42 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
|
| 14 |
+
INFO 01-04 13:13:42 [cuda.py:221] Using Flash Attention backend on V1 engine.
|
| 15 |
+
WARNING 01-04 13:13:42 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
|
| 16 |
+
INFO 01-04 13:13:42 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
INFO 01-04 13:15:40 [loader.py:458] Loading weights took 117.89 seconds
|
| 35 |
+
INFO 01-04 13:15:41 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 118.156821 seconds
|
| 36 |
+
INFO 01-04 13:15:55 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
|
| 37 |
+
INFO 01-04 13:15:55 [backends.py:430] Dynamo bytecode transform time: 14.60 s
|
| 38 |
+
INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later use
|
| 39 |
+
INFO 01-04 13:16:50 [backends.py:148] Compiling a graph for general shape takes 53.42 s
|
| 40 |
+
INFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 68.02 s in total
|
| 41 |
+
INFO 01-04 13:17:54 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
|
| 42 |
+
INFO 01-04 13:17:54 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94x
|
| 43 |
+
INFO 01-04 13:18:32 [gpu_model_runner.py:1686] Graph capturing finished in 39 secs, took 1.21 GiB
|
| 44 |
+
INFO 01-04 13:18:32 [core.py:159] init engine (profile, create kv cache, warmup model) took 171.40 seconds
|
| 45 |
+
INFO 01-04 13:18:32 [core_client.py:439] Core engine process 0 ready.
|
| 46 |
+
WARNING 01-04 13:18:32 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
|
| 47 |
+
INFO 01-04 13:18:32 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 48 |
+
INFO 01-04 13:18:32 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 49 |
+
INFO 01-04 13:18:32 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8001
|
| 50 |
+
INFO 01-04 13:18:32 [launcher.py:28] Available routes are:
|
| 51 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /openapi.json, Methods: HEAD, GET
|
| 52 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /docs, Methods: HEAD, GET
|
| 53 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: HEAD, GET
|
| 54 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /redoc, Methods: HEAD, GET
|
| 55 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /health, Methods: GET
|
| 56 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /load, Methods: GET
|
| 57 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /ping, Methods: GET, POST
|
| 58 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /tokenize, Methods: POST
|
| 59 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /detokenize, Methods: POST
|
| 60 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/models, Methods: GET
|
| 61 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /version, Methods: GET
|
| 62 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
|
| 63 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/completions, Methods: POST
|
| 64 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/embeddings, Methods: POST
|
| 65 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /pooling, Methods: POST
|
| 66 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /score, Methods: POST
|
| 67 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/score, Methods: POST
|
| 68 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
|
| 69 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /rerank, Methods: POST
|
| 70 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/rerank, Methods: POST
|
| 71 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v2/rerank, Methods: POST
|
| 72 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /invocations, Methods: POST
|
| 73 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /metrics, Methods: GET
|
| 74 |
+
INFO: Started server process [1315105]
|
| 75 |
+
INFO: Waiting for application startup.
|
| 76 |
+
INFO: Application startup comINFO 01INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.INFO 01-04INFO 01-04 14:00:03 [loggers.py:111] Engine 000: Avg prompt throughput: 106.6 tokens/s, Avg generation throughput: 70.6 tokens/s, Running: 1 reqINFO 01-INFO 01-04 14:00:07 [loggers.py:111] Engine 000: Avg prINFO 01-04 INFO 01-04 14:00:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.2 tokens/s, Running: 1 reqs,INFO 01-04 14:00:15 [loggers.py:111] Engine 000: Avg prompt tINFINFO 01-04 14:00:17 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04INFO 01-04 14:00:23 [loggers.py:111] Engine 000: Avg prompt throughput: 77.3 toINFO 01-04 14:00:25 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:00:26 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 INFO 01-04 14:00:33 [loggers.py:111] Engine 000: Avg prompt throughput: 64.9 tokINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 77 |
+
INFO 01-04 14:00:35 [loggers.py:111] Engine 000: Avg promptINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 78 |
+
INFO 01-0INFO 01-04 14:00:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/sINFO 01-04 14:00:45 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:00:46 [loggers.py:111] Engine 000: Avg prompt througINFO 01-INFO 01-04 14:00:53 [loggers.py:111] Engine 000: Avg prompt throughput: 72.0 tokens/s, Avg generation throughput: 74.1 tokens/s, Running: 2 reINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 79 |
+
ININFO 01INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1INFO 01-04 INFO 01-04 14:01:03 [loggers.py:111] Engine 000: Avg prompt throughput: 70.9 tokens/s, Avg generation throughput: 72.9 tokens/s, Running: 2 reqINFO 01-04 14:01:06 [loggers.py:111] Engine 000: Avg prompt thrINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 80 |
+
INFO 01-04 14INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 81 |
+
INFO 01-04 14:01:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0INFOINFO 01-04 14:01:17 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:01:17 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14INFO 01-04 14:01:23 [loggers.py:111] Engine 000: Avg prompt throughput: 81.3 tokens/s, INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/INFO 01-04 14:01:27 [loggers.py:111] Engine 000: Avg prompt throughput: 81.7 INFO: 10.46.17.192:0 - "POST /v1/completions HTTPINFO 01-04 14INFO 01-04 14:01:33 [loggers.py:111] Engine 000: Avg prompt throughput: 67INFO 0INFO 01-04 14:01:37 [loggers.py:111] Engine 000: Avg prompt throughput: 110.1 tokens/s, Avg generation throughput: 106.4 tokensINFO 01-04 INFO 01-04 14:01:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 rINFO 01INFO 01-04 14:01:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 111.8 toINFO: 10.46INFO 01-04 14:01:53 [loggers.py:111] Engine 000: Avg prompt throughput: 72.8 tokeINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 20INFO INFO 01-04 14:01:57 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14INFO 01-04 14:02:03 [loggers.py:111] Engine 000: Avg prompt throughput: 123.4 tokens/s, Avg generation throughput: 62.8 tokens/s, Running: 2 reqs, INFOINFO 01-04 14:02:05 [loggers.py:111] Engine 000: Avg promINFINFO 01-04 14:02:07 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:0INFO 01-04 14:02:13 [loggers.py:111] Engine 000: Avg prompt throughput: 55.0 tokens/s, Avg generation throughput: 87.8 tokens/s, Running: 2 reININFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:INFO 01-04 14:02:23 [loggers.py:111] Engine 000: Avg prompt throughput: 80.3 tokens/sINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1"INFO: INFO 01-04 14:02:25 [loggers.py:111] Engine 000: Avg pINFO 01-04 1INFO: 10.45.190.192:0 - "POST /v1/completions HINFO 01-04 14:02INFO 01-04 14:02:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, INFO 01-04 14:02:35 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:02:36 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:02INFO 01-04 14:02:43 [loggers.py:111] Engine 000: Avg prompt throughput: 143.7 tokens/s, Avg generation throughput: 77.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache IINFO 01-04 14:0INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 82 |
+
INFO 01-04 14:02:53 [loggers.py:111] Engine 000: Avg prompt throughput: 87.8 tokens/s, Avg generation throughput: 97.0 tokens/s, Running: 2 INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 83 |
+
INFO 01-04 14:INFO 01-04 14:03:03 [loggers.py:111] Engine 000: Avg prompt throughput: 110.9 tokens/s, Avg generation throughput: 121.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 84 |
+
INFO:INFO 01-04 14:03:07 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:03:09 [INFO 01-04 14:03:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 51.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, Prefix caINFO: 10.45.190INFO 01-04 14:03:23 [loggers.py:111] Engine 000: Avg prompt throughput: 241.7 tokens/s, Avg generation throughput: 87.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hitINFO 01-04 1INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 85 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 86 |
+
INFO 01-04 14:03:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 87 |
+
INFO 01-04 14:03:37 [loggers.py:111] Engine 000: Avg prompt throughput: 126.7 tokens/s, Avg generation throughput: 82.6 tokINFO 01-04 14:03:39 INFO 01-04 14:03:43 [loggers.py:111] Engine 000: Avg prompt throughput: 139INFO 01-04 14:03:47 [loggers.py:111] Engine 000: Avg prompt throughput: 123.8 tokens/s, Avg generation throughput: 99.1 tokens/s, Running: 3 reqINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 88 |
+
INFO 01-04 14:03:53 [loggers.py:111] Engine 000: Avg prompt throughput:INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 89 |
+
INFO 01-04 14:03:57 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 1INFO 01-04 14:04:03 [loggers.py:111] Engine 000: Avg prompt throughput: 79.5 tokens/s, Avg generation throughput: 62.8 tokens/s, Running:INFO 01-04 14:04:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.4 tokens/s, Running: 2 reqs, WaINFO 01-04 14:04:13 [loggers.py:111] Engine 000: Avg prompt throughput: 134.8 tokens/s, Avg generation throughput: 77.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cachINFO: 10.46.17.192:0 - "POST /v1/completINFO 01-04 1INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 90 |
+
INFO 01-04 14:04:19 [loggers.INFO 01-04 14:04:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 18.0 tokens/s, RunniINFO 01-04 14:04:27 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:04:29 [loggersINFO 01-04 14:04:33 [loggers.py:111] Engine 000: Avg prompt throughput: 193.7 tokens/s, Avg generation throughput: 40.1 tokens/s, RunnINFO 01-04 14:04:37 [loggers.py:111] Engine 000: Avg prompt throughput: 141.9 tokens/s, Avg generation throughput: 85.0 tokINFO 01-04 14:04:39 [loggINFO 01-04 14:04:43 [loggers.py:111] Engine 000: Avg prompt throughputINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 91 |
+
INFO 01-04 14:04:47 [loggers.py:111] Engine 000: Avg promINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 92 |
+
INFO 01-04 14:04:49 [loggeINFO 01-04 14:04:53 [loggers.py:111] Engine 000: Avg prompt throughput: 188.0 tokens/s, Avg generation throughput: 51.7 tokens/s, RuINFO 01-04 14:04:57 [loggers.py:111] Engine 000: Avg prompt throughput: 202.9 tokens/sINFO 01-04 14:05:03 [loggers.py:111] Engine 000: Avg prompt throughput: 118.1 tokens/s, Avg generation throughput: 72.5 tokens/s, RuINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 93 |
+
INFO: 10.46.17.1INFO: 10.46.17.192:0 - "POST /v1/completioINFO 01-04 14:05:07 INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 94 |
+
INFO 01-04 14:05:13 [loggers.py:111] Engine 000: Avg prompt throughput: 208.7 tokens/s, Avg generation throughput: 77.8 tokens/s, RunnINFO 01-04 14:05:17 [loggers.py:111] Engine 000: Avg prompt throughput: 178.8 tokensINFO 01-04 14:05:23 [loggers.py:111] Engine 000: Avg prompt throughput: 155.6 tokens/s, Avg generation throughput: 74.9 tokens/s, RunnINFO 01-04 14:05:27 [loggers.py:111] Engine 000: Avg prompt throughput: 175.9 tokensINFO 01-04 14:05:33 [loggers.py:111] Engine 000: Avg prompt throughput: 164.6 tokens/s, Avg generation throughput: 105.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 6.4%
|
| 95 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 96 |
+
INFO: 10.45.190.192:0 - "POST /v1/completioINFO 01-04 14:05:37 INFO 01-04 14:05:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 76.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cINFO 01-04 14:05:47 [loggers.py:111] Engine 000: Avg prompt throughput: 165.1 tokensINFO 01-04 14:05:53 [loggers.py:111] Engine 000: Avg prompt throughput: 332.1 tokens/s, Avg generation throughput: 80.2 tokens/s, RunnINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 97 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 98 |
+
INFO 01-04 14:05:5INFO 01-04 14:06:03 [loggers.py:111] Engine 000: Avg prompt throughput: 195.5 tokens/s, Avg generation throughput: 69.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cacINFO 01-04 14:06:0INFO 01-04 14:06:13 [loggers.py:111] Engine 000: Avg prompt throughput: 132.6 tokens/s, Avg generation throughput: 120.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix caINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 99 |
+
INFO 01-04 14:06:1INFO 01-04 14:06:23 [loggers.py:111] Engine 000: Avg prompt throughput: 279.3 tokens/s, Avg generation throughput: 123.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 5.0%
|
| 100 |
+
INFO: 10.46.50.192:0 - "POST /v1/completiINFO 01-04 14:06:27 [INFO 01-04 14:06:33 [loggers.py:111] Engine 000: Avg prompt throughput: 266.1 tokens/s, Avg generation throughput: 122.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 4.7%
|
| 101 |
+
INFO: 10.46.17.192:0 - "POST /v1/completiINFO 01-04 14:06:37 [INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 102 |
+
INFO 01-04 14:06:43 [loggers.py:111] Engine 000: Avg prompt throughput: 248.5 tokens/s, Avg generation throughput: 102.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, PrefixINFO: 10.45.190.19INFO 01-04 14:06:53 [loggers.py:111] Engine 000: Avg prompt throughput: 365.8 tokens/s, Avg generation throughput: 112.0 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 7.4%
|
| 103 |
+
INFO 01-04 14:07:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 177.2 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hit rate: 7.4%
|
| 104 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 105 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 106 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 107 |
+
INFO 01-04 14:07:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 88.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cacINFO: 10.43.30INFO 01-04 14:07:23 [loggers.py:111] Engine 000: Avg prompt throughput: 507.3 tokens/s, Avg generation throughput: 87.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 9.2%
|
| 108 |
+
INFO: 10.46.50.192:0 - "POST /v1/completINFO 01-04 14:07:27 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:07INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 109 |
+
INFO 01-04 14:07:33 [loggers.py:111] Engine 000: Avg prompt throughput: 154.9 tokens/s, Avg generation throughpINFO 01-04 14:07:36 [INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 110 |
+
INFO INFO 01-04 14:07:3INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 111 |
+
INFO 01-04 14:07:43 [loggers.py:111] Engine 000: Avg prompt throughput: 283.0 tokens/s, Avg generation throughINFO: 10.43.30INFO 0INFO 01-04 14:07:47 [loggers.py:111] Engine 000: Avg prompt throughput: 345.4 tokens/s, Avg generation throughput: 202.0 tokens/INFO 01-04 14:07:49 INFO 01-04 14:07:53 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:07:57 [loggers.py:111] Engine 000: Avg prompt throughput: 199.4 tokens/s, Avg generation throughput: 105.6 tokens/s, RunINFO: 10.46.50.1INFO 01-04 14:08:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, RunnINFO 01-04 14:08:07 [loggers.py:111] Engine 000: Avg prompt throughput: 166.5 tokensINFO 01-04 14:08:13 [loggers.py:111] Engine 000: Avg prompt throughput: 218.1 tokens/s, Avg generation throughput: 84.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix caINFO 01-04 14:08:17 [loggers.py:111] Engine 000: Avg prompt throughput: 263.8 tokenINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 112 |
+
INFO 01-04 14:08:23 [loggers.py:111] Engine 000: Avg prompt throughput: 140.0 tokens/s, Avg generation throughput: 76.1 tokens/s, RunninINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 113 |
+
INFO 01-04 14:08:2INFO 01-04 14:08:33 [loggers.py:111] Engine 000: Avg prompt throughput: 342.5 tokens/s, Avg generation throughput: 71.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix caINFO 01-04 14:08:37 [loggers.py:111] Engine 000: Avg prompt throughput: 571.2 tokenINFO 01-04 14:08:43 [loggers.py:111] Engine 000: Avg prompt throughput: 226.9 tokens/s, Avg generation throughput: 77.9 tokens/s, RunniINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 114 |
+
INFO 01-04 14:08:47 [loggers.py:111] Engine 000: Avg prompt throughput: 284.4 tokenINFO 01-04 14:08:53 [loggers.py:111] Engine 000: Avg prompt throughput: 323.4 tokens/s, Avg generation throughput: 76.2 tokens/s, RunninINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 115 |
+
INFO: 10.45.INFO 01-04 14:09:03 [loggers.py:111] Engine 000: Avg prompt throughput: 289.2 tokens/s, Avg generation throughput: 117.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: 7.6%
|
| 116 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 117 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 118 |
+
INFO 01-04 14:09:13 [loggers.py:111] Engine 000: Avg prompt throughput: 358.6 tokens/s, Avg generation throughput: 94.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cachINFO: 10.45.1INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 119 |
+
INFO 01-04 14:09:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 74.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hINFO: 10.4INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 120 |
+
INFO 01-04 14:09:33 [loggers.py:111] Engine 000: Avg prompt throughput: 354.6 tokens/s, Avg generation throughput: 71.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cacheINFO 01-04 14:09INFO 01-04 14:09:43 [loggers.py:111] Engine 000: Avg prompt throughput: 771.7 tokens/s, Avg generation throughput: 85.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 8.5%
|
| 121 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTINFO 01-04 14:INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 122 |
+
INFO 01-04 14:09:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 99.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hiINFO: 10.INFO: 10.46.17.192:0 - "POST /v1/completions HTTPINFO 01-04 14INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 123 |
+
INFO 01-04 14:10:03 [loggers.py:111] Engine 000: Avg prompt throughput: 815.3 tokens/s, Avg generation throughput: 58.9 tokens/s, Running: INFO 01-04 14:10:07 [loggers.py:111] Engine 000: Avg prompt throughput: 316.7 toINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 124 |
+
INFO 01-04 14:10:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 43.2 tokens/s, Running: INFO 01-04 14:10:17 [loggers.py:111] Engine 000: Avg prompt throughput: 458.3 toINFO 01-04 14:10:23 [loggers.py:111] Engine 000: Avg prompt throughput: 378.2 tokens/s, Avg generation throughput: 39.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hINFO 01-04 14:10:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokensINFO 01-04 14:10:33 [loggers.py:111] Engine 000: Avg prompt throughput: 383.2 tokens/s, Avg generation throughput: 33.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cacINFO 01-04 14:10:37INFO 01-04 14:10:43 [loggers.py:111] Engine 000: Avg prompt throughput: 237.6 tokens/s, Avg generation throughput: 80.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 11.1%
|
| 125 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 126 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 127 |
+
INFO 01-04 14:10:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 53.4 tokens/s, INFO 01-04 14:10:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvgINFO 01-04 14:11:03 [loggers.py:111] Engine 000: Avg prompt throughput: 126.0 tokens/s, Avg generation throughput: 18.9 tokens/INFO 01-04 14:11:07 [loggers.py:111] Engine 000: Avg prompt throughput: 424.9 tokens/s, Avg INFO 01-04 14:11:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, PrefINFO: 10.46.17.192:0 INFO 01-04 14:11:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cache hit rate: 11.0%
|
| 128 |
+
INFO 01-04 14:11:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 11.0%
|
| 129 |
+
INFO: 10.43.30.3:0 - "POST /v1/comINFO 01-04 14:11:47 [loggers.py:111] Engine 000: Avg prompt throughput: 405.3 tokens/s, Avg generation throughput: 26.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix cache hit rate: 7.7%
|
| 130 |
+
INFO 01-04 14:11:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 7.7%
|
| 131 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 132 |
+
INFO 01-04 14:12:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 10.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 7.7%
|
| 133 |
+
INFO 01-04 14:12:17 [loggers.py:111] Engine 000: Avg prompt throughput: 295.1 tokens/s, Avg generation throughput: 29.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 7.5%
|
| 134 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 135 |
+
INFO 01-04 14:12:27 [loggers.py:111] Engine 000: Avg prompt throughput: 199.6 tokens/s, Avg generation throughput: 77.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.0%, Prefix cache hit rate: 7.4%
|
| 136 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 137 |
+
INFO 01-04 14:12:3INFO 01-04 14:12:37 [loggers.py:111] Engine 000: Avg prompt throughput: 302.2 tokens/s, Avg generation throughput: 17.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cacINFO 01-04 14:12:4INFO 01-04 14:12:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cacINFO: 10.45.19INFO: 10.46.17.192:0 - "POST /v1/completions INFO 01-04 14:12:INFO 01-04 14:12:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 31.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KINFO: 10.46.17.192:0 - "POSTINFO 01-04 14:13:INFO 01-04 14:13:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hINFO: 10.4INFO 01-04 14:13:17 [loggers.py:111] Engine 000: Avg prompt throughput: 392.7 tokens/s, Avg generation throughput: 17.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 10.0%
|
| 138 |
+
INFO 01-04 14:13:27 [loggers.py:111] Engine 000: Avg prompt throughput: 464.7 tokens/s, Avg generation throughput: 45.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 9.6%
|
| 139 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 140 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 141 |
+
INFO 01-04 14:13:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 54.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 9.6%
|
| 142 |
+
INFO 01-04 14:13:47 [loggers.py:111] Engine 000: Avg prompt throughput: 818.8 tokens/s, Avg generation throughput: 43.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 9.1%
|
| 143 |
+
INFO 01-04 14:13:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 9.1%
|
| 144 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 145 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 146 |
+
INFO 01-04 14:14:07 [loggers.py:111] Engine 000: Avg prompt throughput: 359.1 tokens/s, Avg generation throughput: 51.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 8.8%
|
| 147 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 148 |
+
INFO 01-04 14:14:17 [loggers.py:111] Engine 000: Avg prompt throughput: 212.3 tokens/s, Avg generation throughput: 66.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cache hit rate: 9.0%
|
| 149 |
+
INFO 01-04 14:14:27 [loggers.py:111] Engine 000: Avg prompt throughput: 337.4 tokens/s, Avg generation throughput: 81.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 8.8%
|
| 150 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 151 |
+
INFO 01-04 14:14:37 [loggers.py:111] Engine 000: Avg prompt throughput: 347.8 tokens/s, Avg generation throughput: 84.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 10.4%
|
| 152 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 153 |
+
INFO 01-04 14:14:47 [loggers.py:111] Engine 000: Avg prompt throughput: 384.5 tokens/s, Avg generation throughput: 124.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 10.1%
|
| 154 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 155 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 156 |
+
INFO 01-04 14:14:57 [loggers.py:111] Engine 000: Avg prompt throughput: 886.1 tokens/s, Avg generation throughput: 61.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 10.8%
|
| 157 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 158 |
+
INFO 01-04 14:15:07 [loggers.py:111] Engine 000: Avg prompt throughput: 569.5 tokens/s, Avg generation throughput: 71.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: 10.5%
|
| 159 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 160 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTINFO: 10.45INFO 01-04 14:15:17 [loggers.py:111] Engine 000: Avg prompt throughput: 459.1 tokens/s, Avg generation throughput: 54.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 10.2%
|
| 161 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HINFO: 10.46.INFO 01-04 14:15:27 [loggers.py:111] Engine 000: Avg prompt throughput: 458.6 tokens/s, Avg generation throughput: 48.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 12.4%
|
| 162 |
+
INFO 01-04 14:15:37 [loggers.py:111] Engine 000: Avg prompt throughput: 309.6 tokens/s, Avg generation throughput: 63.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 12.2%
|
| 163 |
+
INFO 01-04 14:15:47 [loggers.py:111] Engine 000: Avg prompt throughput: 525.6 tokens/s, Avg generation throughput: 119.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.9%, Prefix cache hit rate: 11.9%
|
| 164 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 165 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 166 |
+
INFO 01-04 14:15:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 97.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 11.9%
|
| 167 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 168 |
+
INFO 01-04 14:16:07 [loggers.py:111] Engine 000: Avg prompt throughput: 435.0 tokens/s, Avg generation throughput: 81.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 11.6%
|
| 169 |
+
INFO 01-04 14:16:17 [loggers.py:111] Engine 000: Avg prompt throughput: 701.2 tokens/s, Avg generation throughput: 78.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 11.2%
|
| 170 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 171 |
+
INFO 01-04 14:16:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 127.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 11.2%
|
| 172 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 173 |
+
INFO 01-04 14:16:37 [loggers.py:111] Engine 000: Avg prompt throughput: 654.4 tokens/s, Avg generation throughput: 127.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hit rate: 10.8%
|
| 174 |
+
INFO 01-04 14:16:47 [loggers.py:111] Engine 000: Avg prompt throughput: 675.2 tokens/s, Avg generation throughput: 106.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:16:47 [loggers.py:111] Engine 000INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 175 |
+
INFO 01-04 14:16:57 [loggers.py:111] Engine 000: Avg prompt throughput: 1014.1 tokens/s, Avg generation throughput: 164.8 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 10.0%, Prefix cache hit rate: 11.7%
|
| 176 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 177 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 178 |
+
INFO 01-04 14:17:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 146.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hit rate: 11.7%
|
| 179 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 180 |
+
INFO 01-04 14:17:17 [loggers.py:111] Engine 000: Avg prompt throughput: 316.3 tokens/s, Avg generation throughput: 92.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPUINFO 01-04 14:17:17 [loggers.py:111] Engine 000: AvgINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 181 |
+
INFO 01-04 14:17:27 [loggers.py:111] Engine 000: Avg prompt throughput: 556.7 tokens/s, Avg generation throughput: 106.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV INFO: 10.46.17.192:0 - "POST /v1/completionsINFO: 10.45.19INFO 01-04 14:17:33 [loggers.py:111] Engine 000: INFO 01-04 14:17:37 [loggers.py:111] Engine 000: Avg prompt throughput: 964.4 tokens/s, Avg generation throughput: 105.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KVINFO: 10.46.17.192:0 - "POST /v1/completions INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 182 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 183 |
+
INFO 01-04 14:17:47 [loggers.py:111] Engine 000: Avg prompt throughput: 483.4 tokens/s, Avg generation throughput: 120.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KVINFO: 10.43.30.3:0 - "POST /v1/completions HTINFO 01-04 14:17:57 [loggers.py:111] Engine 000: Avg prompt throughput: 410.0 tokens/s, Avg generation throughput: 106.2 tokens/s, Running: 3 reqs, Waiting: 0 reqINFO 01-04 14:17:57 [loggers.py:111] Engine 000: Avg prompINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 184 |
+
INFO 01-04 14:18:07 [loggers.py:111] Engine 000: Avg prompt throughput: 394.1 tokens/s, Avg generation throughput: 153.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.9%, Prefix cache hit rate: 13.4%
|
| 185 |
+
INFO: 10.45INFO 01-04 14:18:13 [loggers.py:111] Engine 000: Avg prompt throughput: 551.9 tokens/s, Avg generation throughput: 122INFO 01-04 14:18:17 [loggers.py:111] Engine 000: Avg prompt throughput: 587.7 tokens/s, Avg generationINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 186 |
+
INFO 01-04 14:18:23 [loggers.py:111] Engine 000: AINFO 01-04 14:18:27 [loggers.py:111] Engine 000: Avg prompt throughput: 543.7 tokens/s, Avg generation throughput: 129.9 tokens/s, Running: 3 reqs, Waiting: 0 rINFO 01-04INFO: 10.43.30.5:0 - "POST /v1/completions HTTINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 187 |
+
INFO 01-04 14:18:37 [loggers.py:111] Engine 000: Avg prompt throughput: 539.9 tokens/s, Avg generation throughput: 158.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 9.4%, Prefix cache hit rate: 15.1%
|
| 188 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 189 |
+
INFO 01-04 14:18:47 [loggers.py:111] Engine 000: Avg prompt throughput: 241.8 tokens/s, Avg generation throughput: 156.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.5%, Prefix cache hit rate: 15.0%
|
| 190 |
+
INFO 01-04 14:18:57 [loggers.py:111] Engine 000: Avg prompt throughput: 315.6 tokens/s, Avg generation throughput: 144.8 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 10.4%, Prefix cache hit rate: 14.8%
|
| 191 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 192 |
+
INFO 01-04 14:19:07 [loggers.py:111] Engine 000: Avg prompt throughput: 613.2 tokens/s, Avg generation throughput: 160.6 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.5%, Prefix cache hit rate: 14.7%
|
| 193 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 194 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 195 |
+
INFO 01-04 14:19:17 [loggers.py:111] Engine 000: Avg prompt throughput: 282.8 tokens/s, Avg generation throughput: 153.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cache hit rate: 14.5%
|
| 196 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 197 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 198 |
+
INFO 01-04 14:19:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 110.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 14.5%
|
| 199 |
+
INFO 01-04 14:19:37 [loggers.py:111] Engine 000: Avg prompt throughput: 577.7 tokens/s, Avg generation throughput: 61.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 14.2%
|
| 200 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 201 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 202 |
+
INFO 01-04 14:19:47 [loggers.py:111] Engine 000: Avg prompt throughput: 498.2 tokens/s, Avg generation throughput: 83.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GINFO 01-04 14:19:53 [loggers.py:111] Engine 000: Avg prompt throughput: 451.6 tokens/s, Avg generation throughput: 93.0 INFO 01-04 14:19:57 [loggers.py:111] Engine 000: Avg prompt throughput: 1566.2 tokens/s, Avg generation throughput: 77.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 203 |
+
INFO 01-04 14:20:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg genINFO 01-04 14:20:05 [loggeINFO 01-04 14INFO 01-04 14:20:07 [loggers.py:111] Engine 000: Avg prompt throughput: 606.0 tokens/s, Avg generation throughput: 8.4 tokens/s, Running: 1 reqs, WaiINFO 01-04 14:20:13 [loggers.pyINFO 01-04 14:20:15 [logINFO 01-04 14:20:17 [loggers.py:111] Engine 000: Avg prompt throughput: 631.6 tokens/s, Avg generation throughput: 35.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, INFO: 10.43.30.5:0 - "POST INFO: 10.46.50.192:0INFO: INFO 01-04 14:20:23 [loggers.py:1INFO 01-04 14:20:25 [logINFO 01-04 14:20:27 [loggers.py:111] Engine 000: Avg prompt throughput: 813.4 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reINFO 01-04 14:20:33 [loggers.py:111INFO: 10.45.190.192:INFO 01-04 14:20:37 [loggers.py:111] Engine 000: Avg prompt throughput: 934.1 tokens/s, Avg generation throughput: 59.4 tokens/s, Running: 2 reqs, Waiting: 0 reINFO 01-04 14:20:43 [loggers.py:111] Engine 000: Avg prompt throughput: 385.3 tokens/s, Avg generation throughput: 125.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GINFO 01-04 14:20:45 [loggINFO 01-04 14:20:47 [loggers.py:111] Engine 000: Avg prompt throughput: 499.6 tokens/s, Avg genINFO 01-04 14:20:53 [loggers.py:111] Engine 000: Avg prompt throughput: 348.3 tokens/s, Avg generation throughput: 135.1 tokINFO: 1INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1INFO 01-04 INFO 01-04 14:20:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s,INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 204 |
+
INFO: 10.45.190.192:0 - "POSINFO: 10.43.30.3:0 - INFO 01-04 14:21:07 [loggers.py:111] Engine 000: Avg prompt throughput: 1571.0 tokens/s, Avg generation tINFO 01-04 14:21:08 [loggers.py:111] Engine 000: Avg prompt throughput: 1191.6 tokens/s, Avg generation throughput:INFO 01-04 14:21:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 89.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV caINFO 01-04 14:21:18 [loggers.py:111] Engine 00INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 205 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 206 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 207 |
+
INFO 01-04 14:21:27 [loggers.py:111] Engine 000: Avg prompt throughput: 1640.0 tokens/s, Avg generation thrINFO 01-04 14:21:28 [loggers.py:111] Engine 000: Avg promptIINFO 01-04 14:21:33 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:21:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throuINFO 01-04 14:21:38 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:21:43 [loggers.py:111] Engine 000: AvgINFO: INFO 01-04 14:21:47 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:21:47 [loggers.py:111] Engine 000: Avg prompt throughput: 430.7 tokens/s, Avg generatioINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 2INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 208 |
+
INFO 01-04 14:21:57 [loggers.py:111] Engine 000: Avg prompt throughput: 741.4 tokens/s, Avg generatioINFO 01-04 14:22:03 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:22:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.8 tokens/s, Running: 2 reqs, Waiting: 0 INFO 01-04 14:22:13 [loggers.py:111] Engine 000: Avg prompt throughput: 649.5 tokens/s, Avg generation throughput: 108.5 tokensINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1INFO 01-04 14:22:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 62.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 18.0%
|
| 209 |
+
INFO 01-04 14:22:27 [loggers.py:111] Engine 000: Avg prompt throughput: 1334.7 tokens/s, Avg generation throughput: 87.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.9%, Prefix cache hit rate: 17.4%
|
| 210 |
+
INFO 01-04 14:22:37 [loggers.py:111] Engine 000: Avg prompt throughput: 639.9 tokens/s, Avg generation throughput: 131.4 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 12.0%, Prefix cache hit rate: 17.1%
|
| 211 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 212 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 213 |
+
INFO 01-04 14:22:47 [loggers.py:111] Engine 000: Avg prompt throughput: 607.4 tokens/s, Avg generation throughput: 130.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs,INFO 01-04 14:22:53 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:22:57 [loggers.py:111] Engine 000: Avg prompt throughput: 396.3 tokens/s, Avg generation throughput: 130.5 tokens/s, Running: 4 reqs, Waiting: 0 reINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200INFOINFO 01-04 14:23:03 [loggers.py:111] Engine 000: Avg prompt throughput: 611.2 tokens/s, Avg gINFO 01-04 14:23:05 [loggers.py:11INFO 01-04 14:23:07 [loggers.py:111] Engine 000: Avg prompt throughput: 356.6 tokens/s, Avg INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 214 |
+
INFO 01-04 14:23:13 [loggeINFO: 10.46.17.192:0 - "POST /INFO 01-04 14:23:17 [loggers.py:111] Engine 000: Avg prompt throughput: 621.3 tokens/s, Avg generation throughput: 58.7 tokens/s, RINFO 01-04 14:23:17 [loggersINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 215 |
+
INFO 01-04 14:23:23 [logINFO 01-04 14:23:25 [loggers.py:11INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 216 |
+
INFO 01-04 14:23:27 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:23:27 [loggerINFO 01-04 14:23:33 [loggers.py:111] Engine 000: Avg prompt throughput: 474.7 tokens/s, Avg generation throughput: 51.2 toINFO 01-04 14:23:37 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:23:37 [loggerINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 217 |
+
INFO 01-04 14:23:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg INFO 01-04 14:23:45 [loggersINFO 01-04 14:23:47 [loggers.py:111] Engine 000: Avg prompt throughput: 733.8 tokens/s, Avg generatINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 218 |
+
INFO 01-04 14:23:53 [loggers.py:111] Engine 000: Avg prompt throughput: 414.2 tokens/s, Avg INFO 01-04 14:23:55 [loggers.pINFO 01-04 14:23:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO 01-04 14:24:03 [loggers.py:111] Engine 000: Avg prompt throughput: 587.7 tokens/s, Avg generation throughput: 73.8INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 219 |
+
INFO 01-04 14:24:07 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:24:07 [loggers.py:INFO 01-04 14:24:13 [loggers.py:111] Engine 000: Avg prompt throughput: 759.1 tokens/s, Avg generation throughput: 64.INFO 01-04 14:24:17 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:24:17 [loggers.py:111] Engine 000: Avg prompt throughput: 502.5 tokens/s, Avg generaINFO 01-04 14:24:23 [loggers.py:111] Engine 000: Avg INFO 01-04 14:24:27 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:24:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, WaitinINFO: 1INFO 01-04 14:24:33 [loggers.py:111] Engine 000: Avg ININFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 220 |
+
INFO 01-04 14:24:37 [loggers.py:111] Engine 000: Avg prompt throughput: 744.9 tokens/s, Avg generINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 221 |
+
INFO 01-04 14:24:43INFO 01-04 14:24:45 [loggers.py:111INFO 01-04 14:24:47 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:24:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generaINFO 01-04 14:24:53 [loggers.py:111] Engine 000: AvgINFO 01-04 14:24:57 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:24:57 [loggers.py:111] Engine 000: Avg prompt throughput: 197.1 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, WaiINFO: 10INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/ININFO 01-04 14:25:07 [loggers.py:111] Engine 000: Avg prompt throughput: 767.7 tokens/s, Avg generation throughput: 83.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 15INFO 01-04 14:25:17 [loggers.py:111] Engine 000: Avg prompt throughput:INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 222 |
+
INFO 01-04 14:25:17 [loggers.py:111] Engine 000: Avg prompt throughput: 645.8 tokensINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 223 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 224 |
+
ININFO 01-04 14:25:27 [loggers.py:111] Engine 000: Avg prompt throughput: 849.6 tokens/s, Avg generation throughput: 60.4 tokens/s, Running: 2 reqs, Waiting: 0 reINFO 01-04 14:25:33 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:25:37 [loggers.py:111] Engine 000: Avg prompt throughput: 698.2 tokens/s, Avg generation throughput: 21.3 tokens/s, RunniINFO 01-04 14:25:37 [loggerINFO 01-04 14:25:43 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:25:47 [loggers.py:111] Engine 000: Avg prompt throughput: 555.0 tokens/s, Avg generation throughput: 66.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs,INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 225 |
+
INFO 01-04 14:25:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 118.0 tokenIINFO 01-04 14:25:57 [loggers.py:111] Engine 000: Avg prompt throughput: 761.9 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 2 reqs, Waiting: 0 rINFO 01-04 14:26:03 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:26:07 [loggers.py:111] Engine 000: Avg prompt throughINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 226 |
+
INFO 01-04 14:26:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 227 |
+
INFO 01-04 14:26:17 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:26:17 [loggers.py:111] Engine 000: Avg prompt throughput: 774.4 tokens/s, Avg generatINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.INFO 01-04 14:26:27 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:26:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.7 tokens/s, Running: 1 reqs, Waiting:INFO 01-04 14:26:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.3 tokens/s, RuINFO 01-04 14:26:37 [loggers.py:111] Engine 000: Avg prompt throughput: 944.1 tokens/s,INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 228 |
+
INFO 01-04 14:26:47 [loggers.py:111] Engine 000: Avg prompt throughput: 943.7 tokens/s, Avg generation throughput: 48.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 16.9%
|
| 229 |
+
INFO: 10.43.30.4:0 - "INFO: 10.45.190.192:0 - "POST /v1/INFO 01-04 14:26:57 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:26:57 [loggers.py:111] Engine 000: Avg prompt throughput: 552.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting:INFO 01-04 14:27:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 18.0%
|
| 230 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 231 |
+
INFO 01-04 14:27:17 [loggers.py:111] Engine 000: Avg prompt throughput: 832.9 tokens/s, Avg generation throughput: 38.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 17.7%
|
| 232 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 233 |
+
INFO 01-04 14:27:27 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:27:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 56.5 tokens/s, Running: 1 reqs, WaitiINFO 01-04 14:27:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RunnINFO 01-04 14:27:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s,INFO 01-04 14:27:57 [loggers.py:111] Engine 000: Avg prompt throughput: 1181.9 tokens/s, Avg generation throughput: 41.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.8%, Prefix cache hit rate: 17.3%
|
| 234 |
+
INFO 01-04 14:28:07 [loggers.py:111] Engine 000: Avg prompt throughput: 797.1 tokens/s, Avg generation throughput: 52.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.1%, Prefix cache hit rate: 17.0%
|
| 235 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 236 |
+
INFO 01-04 14:28:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 87.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 17.0%
|
| 237 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 238 |
+
INFO 01-04 14:28:27 [loggers.py:111] Engine 000: Avg prompt throughput: 642.9 tokens/s, Avg generation throughput: 7.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 16.8%
|
| 239 |
+
INFO 01-04 14:28:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 16.8%
|
| 240 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 241 |
+
INFO 01-04 14:28:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 6.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.8%
|
| 242 |
+
INFO 01-04 14:28:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.8%
|
| 243 |
+
INFO 01-04 14:29:07 [loggers.py:111] Engine 000: Avg prompt throughput: 706.4 tokens/s, Avg generation throughput: 26.5 tokens/s, RunINFO 01-04 14:29:07 [loggers.py:111] Engine 000: Avg prompt throughput: 1099.8 tokens/INFO 01-04 14:29:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, RunninINFO 01-04 14:29:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 244 |
+
INFO 01-04 14:29:27 [loggers.py:111] Engine 000: Avg prompt throughput: 714.1 tokens/s, Avg generation throughput: 53.9 tokens/s, RunINFO 01-04 14:29:27 [loggers.py:111] Engine 000: Avg prompt throughput: 972.9 tokens/sINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 245 |
+
INFO 01-04 14:29:37 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:29:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, WaitiINFO 01-04 14:29:47 [loggers.py:111] Engine 000: Avg prompt throughput: 1500.7 tokens/s, Avg generation throughput: 32.1 tokens/s, RuINFO 01-04 14:29:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 246 |
+
INFO 01-04 14:29:57 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:29:57 [loggers.py:111] Engine 000: Avg prompt throughput: 452.7 tokens/s, Avg generation throughput: 15.5 tokens/s, Running: 1 reqs, WaitiINFO 01-04 14:30:07 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:30:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, WaitingINFO 01-04 14:30:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 16.6%
|
| 247 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 248 |
+
INFO 01-04 14:30:27 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:30:27 [loggers.py:111] Engine 000: Avg prompt throughput: 516.8 tokens/s, Avg generation throughput: 13.5 tokens/s, Running: 1 reqs, WaiINFO 01-04 14:30:37 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:30:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, WaINFO 01-04 14:30:47 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:30:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, WaitiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 249 |
+
INFO 01-04 14:30:57 [loggers.py:111] Engine 000: Avg prompt throughput: 1098.1 tokens/s, Avg generation throughput: 51.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.0%, Prefix cache hit rate: 14.INFO 01-04 14:31:07 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:31:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 49.9 tokens/s, Running: 1 reqs, WaitingINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 250 |
+
INFO 01-04 14:31:17 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:31:17 [loggers.py:111] Engine 000: Avg prompt throughput: 903.7 tokens/s, Avg generation throughput: 5.7 tokens/s, Running: 1 reqs, WaitINFO 01-04 14:31:27 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:31:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.1 tokens/s, Running: 1 reqs, WaiINFO 01-04 14:31:37 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:31:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generaINFO 01-04 14:31:45 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:31:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.4 tokens/s, RunniINFO 01-04 14:31:47 [loggers.py:INFO 01-04 14:31:55 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.INFO 01-04 14:31:57 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:31:57 [loggers.py:1INFO 01-04 14:32:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.INFO 01-04 14:32:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation INFO 01-04 14:32:15 [loggers.py:111] Engine 000: Avg prompt throughput: 819.8 tokens/s, Avg generation throughput: INFO 01-04 14:32:27 [loggers.py:111] Engine 000: Avg prompt throughput: 471.4 tokens/s, Avg generation throughput: 11.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 17.0%
|
| 251 |
+
INFO 01-04 14:32:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, RunningINFO 01-04 14:32:57 [loggers.py:111] Engine 000: Avg prompt throughput: 832.5 tokens/s, Avg generation throughput: 43.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 13.8%
|
| 252 |
+
INFO 01-04 14:33:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 13.8%
|
| 253 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 254 |
+
INFO 01-04 14:33:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 39.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.8%
|
| 255 |
+
INFO 01-04 14:33:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.8%
|
| 256 |
+
INFO 01-04 14:33:47 [loggers.py:111] Engine 000: Avg prompt throughput: 849.0 tokens/s, Avg generation throughput: 6.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 14.9%
|
| 257 |
+
INFO 01-04 14:33:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 14.9%
|
| 258 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 259 |
+
INFO 01-04 14:34:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokINFO 01-04 14:34:27 [loggers.py:111] Engine 000: Avg prompt throughput: 579.9 tokens/s, Avg generation throughput: 22.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 17.3%
|
| 260 |
+
INFO 01-04 14:34:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 17.3%
|
| 261 |
+
INFO: 10.46.INFO 01-04 14:34:45 [loggers.py:111] Engine 000: AINFO 01-04 14:34:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 33.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV INFO 01-04 14:34:55 [loggers.py:111] Engine 000:INFO 01-04 14:34:57 [loggers.py:111] Engine 000: Avg prompt throughput: 1370.5 tokens/s, Avg generation throughput: 39.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GINFO 01-04 14:34:59 [loggers.py:111] EngineINFO 01-04 14:35:16 [loggers.py:111] Engine 000: Avg prompt throughput: 518.8 tokens/s, Avg generation throughput: 2.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 18.5%
|
| 262 |
+
INFO 01-04 14:35:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 18.5%
|
| 263 |
+
INFO: 10.46.50.192:0INFO 01-04 14:35:45 [loggers.py:111] Engine 000: INFO 01-04 14:36:47 [loggers.py:111] Engine 000: Avg prompt throughput: 1461.8 tokens/s, Avg generation throughput: 9.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.8%, Prefix cache hit rate: 19.9%
|
| 264 |
+
INFO 01-04 14:36:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.0%, Prefix cache hit rate: 19.9%
|
| 265 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 266 |
+
INFO 01-04 14:37:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 19.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.9%
|
| 267 |
+
INFO 01-04 14:37:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPUINFO 01-04 14:37:29 [loggers.py:111] Engine 000: AvgINFO 01-04 14:38:37 [loggers.py:111] Engine 000: Avg prompt throughput: 1114.2 tokens/s, Avg generation throughput: 3.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 20.5%
|
| 268 |
+
INFO 01-04 14:38:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cache hit rate: 20.5%
|
| 269 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 270 |
+
INFO 01-04 14:38:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 2.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.5%
|
| 271 |
+
INFO 01-04 14:39:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.5%
|
| 272 |
+
INFO 01-04 14:40:17 [loggers.py:111] Engine 000: Avg prompt throughput: 603.6 tokens/s, Avg generation throughput: 35.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 20.3%
|
| 273 |
+
INFO 01-04 14:40:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 20.3%
|
| 274 |
+
INFO 01-04 14:40:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 20.3%
|
| 275 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 276 |
+
INFO 01-04 14:40:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.3%
|
| 277 |
+
INFO 01-04 14:40:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20INFOINFO 01-04 14:41:57 [loggers.py:111] Engine 000: Avg prompt throughput: 634.9 tokens/s, Avg generation throughput: 44.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 21.0%
|
| 278 |
+
INFO 01-04 14:42:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 21.0%
|
| 279 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 280 |
+
INFO 01-04 14:42:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 21.0%
|
| 281 |
+
INFO 01-04 14:42:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.INFO 01-04 14:42:37 [loggers.py:111] Engine 000: Avg prompt throughput: 1122.6 tokens/s, Avg generation throughput: 45.8 tokens/s, Running: 1 INFO 01-04 14:43:07 [loggers.py:111] Engine 000: Avg prompt throughput: 726.2 tokens/s, Avg generation throughput: 20.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 20INFO 01-04 14:43:46 [loggers.py:111] Engine 000: Avg prompt throughput: 639.0 tokens/s, Avg generation throughput: 34.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 18.5%
|
| 282 |
+
INFO 01-04 1INFO 01-04 14:44:05 [loggers.py:111] Engine 000: Avg prompt throughput: 1187.5 tokens/s, Avg generation throughput: 40.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.8%, Prefix cache hINFO 01-04 14:4INFO 01-04 14:44:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.1 tokens/s, RINFO 01-04 14:44:37 [loggers.py:111] Engine 000: Avg prompt throughput: 700.7 tokens/s, Avg genINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 283 |
+
INFO 01-04 14:45:19 [loggers.py:111] Engine 000: Avg prompt throughput: 1233.9 tokens/s, Avg generation throughput: 37.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, PrefINFO 01-04 14:48:27 [loggers.py:111] Engine 000: Avg prompt throughput: 481.7 tokens/s, Avg generation throughput: 39.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 20.6%
|
| 284 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 285 |
+
INFO 01-04 14:48:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 43.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.6%
|
| 286 |
+
INFO 01-04 14:48:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.6%
|
| 287 |
+
INFO 01-04 15:04:17 [loggers.py:111] Engine 000: Avg prompt throughput: 497.6 tokens/s, Avg generation throughput: 19.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 20.5%
|
| 288 |
+
INFO 01-04 15:04:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 20.5%
|
| 289 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 290 |
+
INFO 01-04 15:04:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 42.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.5%
|
| 291 |
+
INFO 01-04 15:04:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.5%
|
| 292 |
+
000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 10.6%
|
| 293 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 294 |
+
INFO 01-04 15:01:39 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 33.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.6%
|
| 295 |
+
INFO 01-04 15:01:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.6%
|
| 296 |
+
ration throughput: 29.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.1%
|
| 297 |
+
INFO 01-04 15:01:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.1%
|
| 298 |
+
ning: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.5%
|
| 299 |
+
INFO 01-04 15:01:25 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.5%
|
| 300 |
+
: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 14.7%
|
| 301 |
+
INFO 01-04 14:51:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 14.7%
|
| 302 |
+
INFO 01-04 14:51:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 14.7%
|
| 303 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 304 |
+
INFO 01-04 14:51:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 2.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.7%
|
| 305 |
+
IINFO 01-04 14:58:18 [loggers.py:111] Engine 000: Avg prompt throughput: 567.3 tokens/s, Avg generation throughput: 26.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 16.4%
|
| 306 |
+
INFO 01-04 14:58:28 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 16.4%
|
| 307 |
+
INFO 01-04 14:58:38 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 16.4%
|
| 308 |
+
INFO 01-04 14:58:48 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 16.4%
|
| 309 |
+
INFO 01-04 14:58:58 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 16.4%
|
| 310 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 311 |
+
INFO 01-04 14:59:08 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 6.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.4%
|
| 312 |
+
INFO 01-04 14:59:18 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.4%
|
hf_ip/vllm_gpu1.log
ADDED
|
@@ -0,0 +1,387 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 2 |
+
warnings.warn(
|
| 3 |
+
INFO 01-04 13:13:27 [__init__.py:239] Automatically detected platform cuda.
|
| 4 |
+
INFO 01-04 13:13:29 [api_server.py:1043] vLLM API server version 0.8.5
|
| 5 |
+
INFO 01-04 13:13:29 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8002, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
|
| 6 |
+
INFO 01-04 13:13:36 [config.py:717] This model supports multiple tasks: {'score', 'classify', 'reward', 'embed', 'generate'}. Defaulting to 'generate'.
|
| 7 |
+
INFO 01-04 13:13:37 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
|
| 8 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 9 |
+
warnings.warn(
|
| 10 |
+
INFO 01-04 13:13:41 [__init__.py:239] Automatically detected platform cuda.
|
| 11 |
+
INFO 01-04 13:13:44 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
|
| 12 |
+
WARNING 01-04 13:13:44 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7fdcbc82a650>
|
| 13 |
+
INFO 01-04 13:13:46 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
|
| 14 |
+
INFO 01-04 13:13:46 [cuda.py:221] Using Flash Attention backend on V1 engine.
|
| 15 |
+
WARNING 01-04 13:13:46 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
|
| 16 |
+
INFO 01-04 13:13:46 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
INFO 01-04 13:15:40 [loader.py:458] Loading weights took 114.09 seconds
|
| 35 |
+
INFO 01-04 13:15:41 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 114.594255 seconds
|
| 36 |
+
INFO 01-04 13:15:55 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
|
| 37 |
+
INFO 01-04 13:15:55 [backends.py:430] Dynamo bytecode transform time: 14.52 s
|
| 38 |
+
INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later use
|
| 39 |
+
INFO 01-04 13:16:50 [backends.py:148] Compiling a graph for general shape takes 53.09 s
|
| 40 |
+
INFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 67.61 s in total
|
| 41 |
+
INFO 01-04 13:17:54 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
|
| 42 |
+
INFO 01-04 13:17:54 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94x
|
| 43 |
+
INFO 01-04 13:18:34 [gpu_model_runner.py:1686] Graph capturing finished in 40 secs, took 1.21 GiB
|
| 44 |
+
INFO 01-04 13:18:34 [core.py:159] init engine (profile, create kv cache, warmup model) took 172.87 seconds
|
| 45 |
+
INFO 01-04 13:18:34 [core_client.py:439] Core engine process 0 ready.
|
| 46 |
+
WARNING 01-04 13:18:34 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
|
| 47 |
+
INFO 01-04 13:18:34 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 48 |
+
INFO 01-04 13:18:34 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 49 |
+
INFO 01-04 13:18:34 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8002
|
| 50 |
+
INFO 01-04 13:18:34 [launcher.py:28] Available routes are:
|
| 51 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /openapi.json, Methods: HEAD, GET
|
| 52 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /docs, Methods: HEAD, GET
|
| 53 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: HEAD, GET
|
| 54 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /redoc, Methods: HEAD, GET
|
| 55 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /health, Methods: GET
|
| 56 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /load, Methods: GET
|
| 57 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /ping, Methods: POST, GET
|
| 58 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /tokenize, Methods: POST
|
| 59 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /detokenize, Methods: POST
|
| 60 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /v1/models, Methods: GET
|
| 61 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /version, Methods: GET
|
| 62 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
|
| 63 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /v1/completions, Methods: POST
|
| 64 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /v1/embeddings, Methods: POST
|
| 65 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /pooling, Methods: POST
|
| 66 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /score, Methods: POST
|
| 67 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /v1/score, Methods: POST
|
| 68 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
|
| 69 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /rerank, Methods: POST
|
| 70 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /v1/rerank, Methods: POST
|
| 71 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /v2/rerank, Methods: POST
|
| 72 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /invocations, Methods: POST
|
| 73 |
+
INFO 01-04 13:18:34 [launcher.py:36] Route: /metrics, Methods: GET
|
| 74 |
+
INFO: Started server process [1315813]
|
| 75 |
+
INFO: Waiting for application startup.
|
| 76 |
+
INFO: Application startup compINFO 0INFO 01-04 13:59:57 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:00:00 [loggers.py:111] Engine 000: Avg prompt throughput: 44.1 tokens/s, Avg generation throughput: 37.1 tokens/s, Running: 1 reqs, Waiting: INFO 0INFO 01-04 14:00:07 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:00:10 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: INFO 01-04 14:00:15 [loggers.py:111] Engine 000: Avg prompt thINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 77 |
+
INFO 01-04 14:00:20 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg INFO 01-04 14:00:25 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:00:27 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:00:30 [loggers.py:111] Engine 000: Avg prompt throughput: 48.6 tokens/s, Avg gINFO: 10.46.17.192:0 - "GET /v1/models HTTP/1.1" 200 OK
|
| 78 |
+
INFO: 1INFO 01-04 14:00:37 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:00:40 [loggers.py:111] Engine 000: Avg prompt throughput: 64.6 tokens/s, Avg generation throughput: 71.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, Prefix cache hit rate: 1.0%
|
| 79 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 80 |
+
INFO 01-04 14:00:50 [loggers.py:111] Engine 000: Avg prompt throughput: 60.4 tokens/s, Avg generation throughput: 97.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cache hit rate: 0.7%
|
| 81 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 82 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 83 |
+
INFO 01-04 14:01:00 [loggers.py:111] Engine 000: Avg prompt throughput: 82.0 tokens/s, Avg generation throughput: 97.5 tokens/s, Running: 1 reqs, WaitINFO: INFO 01-04 14:01:07 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:01:10 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.6%, Prefix cache hit rate: 1.1%IINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 84 |
+
INFO 01-04 14:01:20 [loggers.py:111] Engine 000: Avg prompt throughput: 85.1 tokens/s, Avg generation throughput: 75.3 tokens/s, Running: 1 reqs, WaitiINFINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 INFO 01-INFO 01-04 14:01:27 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:01:30 [loggers.py:111] Engine 000: Avg prompt throughput: 96.6 tokens/s, INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 85 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 86 |
+
INFO 01-04 14:01:40 [loggers.py:111] Engine 000: Avg prompt throughput: 66.1 tokens/s, Avg generation throughput: 84.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 0.6%
|
| 87 |
+
ININFO: INFO 01-04 14:01:47 [loggers.py:111] Engine 000: Avg pINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 88 |
+
INFO 01-04 14:01:50 [loggers.py:111] Engine 000: Avg prompt throughput: 96.2 tokens/s, Avg generINFO 01-04 14:01:57 [loggers.py:111] Engine 000: Avg prompt througINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 89 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.INFO 01-04 14:02:00 [loggers.pyINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" INFO 01INFO 01-04 14:02:07 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:02:07 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:02:10 [loggers.py:111] Engine 000: Avg prompt throughput: 96.3 tokens/s, Avg genINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 90 |
+
INFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompt thrINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 91 |
+
INFO 01-04 14:02:20 [loINFINFO 01-04 14:02:27 [loggers.py:111] Engine 000: Avg prompt throughput: 96.0 tokens/s, Avg generation throughput: 86.9 tokens/sINFO 01-04 14:02:27 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:02:30 [loggeINFO 01-04 14:02:37 [loggers.py:111] Engine 000: Avg prompt throughput: 113.8 tokens/s, Avg generation throughput: 138.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, PINFO 01-04 14:02:40 [loggerINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 92 |
+
INFO 01-04 14:02:47 [loggers.py:111] Engine 000: Avg prompt throughput: 133.4 tokens/s, Avg generation throughput: 181.1 tokeINFO 01-04 14:02:47 [loggers.py:111] Engine 000: Avg prompt throughput: 62.3 tokens/s, Avg genINFO: 10.46.50.192:0 - "POST /v1/coINFO 01-04 14:02:50 INFO: INFO 01-04 14:02:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 107.0 tokeINFO 01-04 14:02:57 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:03:00 [loggers.py:111] Engine 000: Avg prompt throughput: 138.3 tokens/s, INFO 0INFO 01-04 14:03:07 [loggers.py:111] Engine 000: Avg promptINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OINFO 01-04 14:03:10 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting:INFO 01-04 14:03:15 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:03:17 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:03:20 [loggers.py:111] Engine 000: Avg prompt throughput: 137.2 tokens/s, Avg generation throughput: 59.5 tokens/s, Running: 1 reqs, Waiting: INFO 01-04 14:03:25 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:03:30 [loggers.py:111] Engine 000: Avg prompt throughput: 154.1 tokens/s, Avg generation throughput: 88.1 tokens/s, Running: 2 reqs, Waiting: INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200INFO: INFO: 10.45.190.192:0 - "POST /v1/completions HTTPINFO 01-04 14:03:40 [loggers.py:111] Engine 000: Avg prompt throughput: 260.7 tokens/s, Avg generation throughput: 95.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 5.4%
|
| 93 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 94 |
+
INFO 01-04 14:03:50 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 129.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 5.4%
|
| 95 |
+
ININFO: INFO 01-04 14:03:57 [loggers.py:111] Engine 000: AvgINFO 01-04 14:04:00 [loggers.py:111] Engine 000: Avg prompt throughput: 155.5 tokens/s, Avg generation throughput: 70.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/INFO: 10INFO 01-04 14:04:07 [loggers.py:111] Engine 000: AvgINFO 01-04 14:04:10 [loggers.py:111] Engine 000: Avg prompt throughput: 111.4 tokens/s, Avg generation throughput: 88.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/INFO: INFINFO 01-04 14:04:17 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:04:17 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:04:20 [loggers.py:111] Engine 000: Avg prompt throughput: 159.6 tokens/s, Avg generatiINFO 01-04 14:04:27 [loggers.py:111] Engine 000: Avg prompt throughput: 154.5 tokens/s, Avg generation throughput: 78.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-04 14:04:30 [loggers.py:11INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 96 |
+
INFO 01-04 14:04:37 [loggers.py:111] Engine 000: Avg prompt throughput: 169.7 tokens/s, Avg generation throughput: 116.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-04 14:04:40 [loggers.py:11INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/INFO 01-04 1INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 97 |
+
INFO 01-04 14:04:47 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:04:47 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:04:50 [loggers.py:111] Engine 000: Avg prompt throughput: 107.0 tokens/s,INFO 01-04INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OINFO 01-04 14:04:57 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:05:00 [loggers.py:111] Engine 000: Avg prompt throughput: 314.6 tokens/s,INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 98 |
+
INFO 01-04 INFO 01-04 14:05:07 [loggers.py:111] Engine 000: Avg prompt throughput: 336.8 tokens/s, Avg generation throughput: 42.1 toINFO 01-04 14:05:10 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generaINFO 01-04 14:05:17 [loggers.py:111] Engine 000: Avg prompt throughput: 164.8 tokens/s, Avg generation throughput: 98.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, PrefixINFO 01-04 14:05:17 [lINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 99 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1INFO 01-04 INFO 01-04 14:05:27 [loggers.py:111] Engine 000: Avg prompt throughput: 227.0 tokens/s, Avg generation throughput: 98.1 tINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 100 |
+
INFO 01-04 14:05:30 [INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 101 |
+
INFO: 10INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1INFO 01-0INFO 01-04 14:05:37 [loggers.py:111] Engine 000: AvINFO 01-04 14:05:40 [loggers.py:111] Engine 000: Avg prompt throughput: 322.2 tokens/s, Avg generation throughput: 48.5 tokens/s, Running: 2 reqs, Waiting: INFO 01-0INFO 01-04 14:05:47 [loggers.py:111] Engine 000: Avg INFO 01-04 14:05:50 [loggers.py:111] Engine 000: Avg prompt throughput: 193.6 tokens/s, Avg generation throughput: 101.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, INFO 01-04 14:05:57 [loggers.py:111] Engine 000: Avg prompt throughput: 176.7 tokens/s, Avg generation throughput: 80.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1INFO: 10.43.30.3:0 - "POST /vINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 102 |
+
INFO 01-04 14:06:07 [loggers.py:111] Engine 000: Avg prompt throughput: 274.3 tokens/s, Avg generation throughput: 132.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usagINFO 01-04 14:06:10 [loggers.py:111] INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 103 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 104 |
+
INFO 01-04 14:06:17 [loggers.py:111] Engine 000: Avg INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 105 |
+
INFO 01-04 14:06:20 [loggers.py:111] Engine 000: Avg prompt throughput: 317.3 tokens/s, Avg generatINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 106 |
+
INFO 01-04 14:06:27 [loggers.py:111] Engine 000: Avg prompt throughput: 275.0 tokens/s, Avg generation throughput: 74.9 INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 107 |
+
INFO 01-04 14:06:30 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg gINFO INFO 01-04 14:06:37 [loggers.py:111] Engine 000: Avg prompt throughput: 305.9 tokens/s, Avg generation throughput: 49.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usaINFO INFO 01-04 14:06:40 [loggers.py:1INFO 01-04 14:06:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 95.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1INFO: 10.43.30.5:0 - "POST /vINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 20INFO INFO 01-04 14:06:57 [loggers.py:111] Engine 000: Avg prompt throughput: 461.4 tokens/s, Avg generation throughput: 113.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usagINFO 01-04 14:07:00 [loggers.py:111] Engine 000: Avg prompt throughput: 143.9 tokens/s, Avg generatINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 108 |
+
INFO 0INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 109 |
+
INFO 01-04 14:07:07 [loggers.py:111] Engine 000: AvINFO 01-04 14:07:10 [loggers.py:111] Engine 000: Avg prompt throughput: 247.3 tokens/s, Avg generation throughput: 63.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GININFO 01-04 14:07:17 [loggers.py:111] Engine 000: AvINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 110 |
+
INFO 01-04 14:07:20 [loggers.py:111] Engine 000: Avg prompt throughput: 267.1 tokens/s, Avg generatINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 111 |
+
INFO 01-04 14:07:27 [loggers.py:111] Engine 000: Avg proINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 112 |
+
INFO 01-04 14:07:30 [loggers.py:111] Engine 000: Avg prompt throughput: 195.5 tokens/s, Avg genINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 113 |
+
INFO 01-04 14:07:37 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:07:40 [loggers.py:111] Engine 000: Avg prompt throughput: 253.5 tokens/s, Avg generation throughput: 110.5 tokens/s, Running: 3 reqs, Waiting: 0 INFOINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 114 |
+
INFO 01-04 14:07:50 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 116.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 1.9%
|
| 115 |
+
INFO: INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 116 |
+
INFO 01-04 14:08:00 [loggers.py:111] Engine 000: Avg prompt throughput: 624.6 tokens/s, Avg generation throughput: 109.3 tokens/s, Running: 2 reqs, Waiting: 0 reqINFO 01-04 14:08:07 [loggers.py:111] Engine 000: Avg promINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 117 |
+
INFO 01-04 14:08:10 [loggers.py:111] Engine 000: Avg prompt throughput: 317.2 tokens/s, Avg genINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 118 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 119 |
+
INFO 01-04 14:08:17 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:08:20 [loggers.py:111] Engine 000: Avg prompt throughput: 283.7 tokens/s, Avg generation throughput: 97.8 tokens/s, Running: 2 reqs, Waiting: 0 reINFO 01-04 14:08:27 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:08:30 [loggers.py:111] Engine 000: Avg prompt throughput: 362.9 tokens/s, Avg generation throughput: 97.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs,INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1INFO: INFO 01-04 14:08:37 [loggers.py:111] Engine 000: Avg prompt throughput: 220.4 tokens/s, Avg generation throughput: 32.8 tokens/INFO 01-04 14:08:40 [loggers.py:111] Engine 000: Avg prompt throughput: 271.7 tokens/s, AvgINFINFO 01-04 14:08:44 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:08:47 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:08:50 [loggers.py:111] Engine 000: Avg prompt throughput: 302.4 tokens/s, Avg generation throughput: 97.2 tokens/s, Running: 3 reqs, WaitingINFO 01-04 14:08:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.6 tokens/s, RunINFO 01-04 14:09:00 [loggers.py:111] Engine 000: Avg prompt throughput: 295.0 tokens/INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 120 |
+
INFO 01-04 14:09:07 [loggers.py:111] Engine 000: Avg prompt throughput: 445.3 tokens/s, Avg generation throughput: 93.3 tokens/s, RINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 121 |
+
INFO 01-04 14:09:10 [INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 122 |
+
INFO 01-04 14:09:17 [loggers.py:111] Engine 000: Avg prompt throughput: 270.6 tokens/s, Avg generation throughput: 73.4 tokens/s, RINFO 01-04 14:09:20 [loggers.py:111] Engine 000: Avg prompt throughput: 412.2 tokens/s,INFO 01-04 14:09:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cINFO 01-04 14:09:30 [loggers.py:111] Engine 000: Avg prompt throughput: 278.8 tokens/sINFO 01-04 14:09:37 [loggers.py:111] Engine 000: Avg prompt throughput: 119.1 tokens/s, Avg generation throughput: 77.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix caINFO 01-04 14:09:40INFO 01-04 14:09:47 [loggers.py:111] Engine 000: Avg prompt throughput: 678.1 tokens/s, Avg generation throughput: 133.0 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.2%, Prefix cache hit rate: 10.2%
|
| 123 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 124 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 125 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 126 |
+
INFO 01-04 14:09:57 [loggers.py:111] Engine 000: Avg prompt throughput: 361.0 tokens/s, Avg generation throughput: 108.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 9.8%
|
| 127 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 128 |
+
INFO: INFO 01-04 14:10:04 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:10:07 [loggers.py:111] Engine 000: Avg prompt throughput: 365.5 tokens/s, Avg generation throughput: 52.3 tokens/s, RuINFO: 10.45.190.192:0 - INFO: 10.46.50.192:0 - "POST /v1/coINFO 01-04 14:10:10 INFO 01-04 14:10:17 [loggers.py:111] Engine 000: Avg prompt throughput: 292.4 tokens/s, Avg generation throughput: 60.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cINFO: 10.46.17.19INFO 01-04 14:10:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 12.2%
|
| 129 |
+
INFO: INFO 01-04 14:10:34 [loggers.py:111] Engine 000: Avg prompt throughput: 226.3 tokens/s, Avg generation throughput: 114.3 tokINFO 01-04 14:10:37 [loggers.py:111] Engine 000: Avg prompt throughput: 336.0 tokens/s, Avg genINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 130 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 131 |
+
INFO 01-04 14:10:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cINFO 01-04 14:10:50 [INFO 01-04 14:10:57 [loggers.py:111] Engine 000: Avg prompt throughput: 306.9 tokens/s, Avg generation throughput: 11.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix INFO: 10.46.50.192INFO 01-04 14:11:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 11.5%
|
| 132 |
+
INFO: 10.46.50.192:0 - "POST /v1/compleINFO 01-04 14:11:10 [loINFO 01-04 14:11:17 [loggers.py:111] Engine 000: Avg prompt throughput: 384.9 tokens/s, Avg generation throughput: 44.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-INFO 01-04 14:11:20 [loggerINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OKIINFO 01-04 14:11:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 19.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usageINFO 01-04 1INFO 01-04 14:11:30 [lINFO 01-04 14:11:44 [loggers.py:111] Engine 000: Avg prompt throughput: 327.4 tokens/s, Avg generation throughput: 8.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 8.8%
|
| 133 |
+
INFO 01-04 14:11:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, PrefINFO 01-04 14:12:00 [loggers.py:111] Engine 000: Avg prompt throughput: 442.4 tokens/s, AvgINFO 01-04 14:12:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 6.4 tokens/s, INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 134 |
+
INFO 01-04 14:12:10 [INFO 01-04 14:12:14 [loggers.py:111] Engine 000: Avg prompt throughput: 271.8 tokens/s, Avg generation throughput: 22.1 tokINFO 01-04 14:12:26 [loggers.py:111] Engine 000: Avg prompt throughput: 277.1 tokens/s, Avg generation throughput: 45.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 11.5%
|
| 135 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 136 |
+
INFO 01-04 14:12:36 [loggers.py:111] Engine 000: Avg prompt throughput: 273.1 tokens/s, Avg generation throughput: 2.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 11.3%
|
| 137 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 138 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 139 |
+
INFO 01-04 14:12:46 [loggers.py:111] Engine 000: Avg prompt throughput: 180.5 tokens/s, Avg generation throughput: 50.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cacheINFO: 10.45.190.192:0 - "POST /v1/complINFO 01-04 14:12:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.1%
|
| 140 |
+
INFO 01-04 14:13:16 [loggers.py:111] Engine 000: Avg prompt throughput: 340.2 tokens/s, Avg generation throughput: 33.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 10.8%
|
| 141 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 142 |
+
INFO 01-04 14:13:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 42.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.8%
|
| 143 |
+
INFO 01-04 14:13:36 [loggers.py:111] Engine 000: Avg prompt throughput: 713.2 tokens/s, Avg generation throughput: 45.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 10.2%
|
| 144 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 145 |
+
INFO 01-04 14:13:46 [loggers.py:111] Engine 000: Avg prompt throughput: 282.3 tokens/s, Avg generation throughput: 99.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 10.0%
|
| 146 |
+
INFO 01-04 14:13:56 [loggers.py:111] Engine 000: Avg prompt throughput: 382.0 tokens/s, Avg generation throughput: 106.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.7%, Prefix cache hit rate: 11.8%
|
| 147 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 148 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 149 |
+
INFO 01-04 14:14:06 [loggers.py:111] Engine 000: Avg prompt throughput: 532.0 tokens/s, Avg generation throughput: 65.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 11.4%
|
| 150 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 151 |
+
INFO 01-04 14:14:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 51.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 11.4%
|
| 152 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 153 |
+
INFO 01-04 14:14:26 [loggers.py:111] Engine 000: Avg prompt throughput: 743.0 tokens/s, Avg generation throughput: 59.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 10.8%
|
| 154 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 155 |
+
INFO 01-04 14:14:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 59.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 10.8%
|
| 156 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 157 |
+
INFO 01-04 14:14:46 [loggers.py:111] Engine 000: Avg prompt throughput: 614.5 tokens/s, Avg generation throughput: 49.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 10.4%
|
| 158 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 159 |
+
INFO 01-04 14:14:56 [loggers.py:111] Engine 000: Avg prompt throughput: 413.1 tokens/s, Avg generation throughput: 38.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 10.1%
|
| 160 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 161 |
+
INFO 01-04 14:15:06 [loggers.py:111] Engine 000: Avg prompt throughput: 872.1 tokens/s, Avg generation throughput: 74.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 9.9%
|
| 162 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 163 |
+
INFO 01-04 14:15:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 50.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 9.9%
|
| 164 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 165 |
+
INFO 01-04 14:15:26 [loggers.py:111] Engine 000: Avg prompt throughput: 535.4 tokens/s, Avg generation throughput: 55.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 9.6%
|
| 166 |
+
INFO 01-04 14:15:36 [loggers.py:111] Engine 000: Avg prompt throughput: 417.3 tokens/s, Avg generation throughput: 68.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 9.4%
|
| 167 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 168 |
+
INFO 01-04 14:15:46 [loggers.py:111] Engine 000: Avg prompt throughput: 550.4 tokens/s, Avg generation throughput: 79.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 9.1%
|
| 169 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 170 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 171 |
+
INFO 01-04 14:15:56 [loggers.py:111] Engine 000: Avg prompt throughput: 605.5 tokens/s, Avg generation throughput: 68.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 11.6%
|
| 172 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 173 |
+
INFO 01-04 14:16:06 [loggers.py:111] Engine 000: Avg prompt throughput: 303.1 tokens/s, Avg generation throughput: 26.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 11.4%
|
| 174 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 175 |
+
INFO 01-04 14:16:16 [loggers.py:111] Engine 000: Avg prompt throughput: 463.2 tokens/s, Avg generation throughput: 53.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 11.2%
|
| 176 |
+
INFO 01-04 14:16:26 [loggers.py:111] Engine 000: Avg prompt throughput: 362.6 tokens/s, Avg generation throughput: 59.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 11.0%
|
| 177 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 178 |
+
INFO 01-04 14:16:36 [loggers.py:111] Engine 000: Avg prompt throughput: 604.3 tokens/s, Avg generation throughput: 64.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 11.8%
|
| 179 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 180 |
+
INFO 01-04 14:16:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 84.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 11.8%
|
| 181 |
+
INFO 01-04 14:16:56 [loggers.py:111] Engine 000: Avg prompt throughput: 576.9 tokens/s, Avg generation throughput: 82.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.2%, Prefix cache hit rate: 11.4%
|
| 182 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 183 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 184 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 185 |
+
INFO 01-04 14:17:06 [loggers.py:111] Engine 000: Avg prompt throughput: 477.4 tokens/s, Avg generation throughput: 81.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.1%
|
| 186 |
+
INFO 01-04 14:17:16 [loggers.py:111] Engine 000: Avg prompt throughput: 929.6 tokens/s, Avg generation throughput: 57.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 14.3%
|
| 187 |
+
INFO 01-04 14:17:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 14.3%
|
| 188 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 189 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 190 |
+
INFO 01-04 14:17:36 [loggers.py:111] Engine 000: Avg prompt throughput: 633.9 tokens/s, Avg generation throughput: 68.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 16.5%
|
| 191 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 192 |
+
INFO 01-04 14:17:46 [loggers.py:111] Engine 000: Avg prompt throughput: 708.9 tokens/s, Avg generation throughput: 40.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 16.0%
|
| 193 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 194 |
+
INFO 01-04 14:17:56 [loggers.py:111] Engine 000: Avg prompt throughput: 520.8 tokens/s, Avg generation throughput: 29.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 16.7%
|
| 195 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 196 |
+
INFO 01-04 14:18:06 [loggers.py:111] Engine 000: Avg prompt throughput: 496.4 tokens/s, Avg generation throughput: 33.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 16.4%
|
| 197 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 198 |
+
INFO 01-04 14:18:16 [loggers.py:111] Engine 000: Avg prompt throughput: 407.0 tokens/s, Avg generation throughput: 68.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix cache hit rate: 16.6%
|
| 199 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 200 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 201 |
+
INFO 01-04 14:18:26 [loggers.py:111] Engine 000: Avg prompt throughput: 524.0 tokens/s, Avg generation throughput: 49.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.3%
|
| 202 |
+
INFO 01-04 14:18:36 [loggers.py:111] Engine 000: Avg prompt throughput: 412.6 tokens/s, Avg generation throughput: 16.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix cache hit rate: 16.5%
|
| 203 |
+
INFO 01-04 14:18:46 [loggers.py:111] Engine 000: Avg prompt throughput: 741.7 tokens/s, Avg generation throughput: 74.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 16.0%
|
| 204 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 205 |
+
INFO 01-04 14:18:56 [loggers.py:111] Engine 000: Avg prompt throughput: 602.2 tokens/s, Avg generation throughput: 104.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 17.2%
|
| 206 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 207 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 208 |
+
INFO 01-04 14:19:06 [loggers.py:111] Engine 000: Avg prompt throughput: 637.4 tokens/s, Avg generation throughput: 51.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 17.6%
|
| 209 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 210 |
+
INFO 01-04 14:19:16 [loggers.py:111] Engine 000: Avg prompt throughput: 353.2 tokens/s, Avg generation throughput: 38.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 17.4%
|
| 211 |
+
INFO 01-04 14:19:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 17.4%
|
| 212 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 213 |
+
INFO 01-04 14:19:36 [loggers.py:111] Engine 000: Avg prompt throughput: 511.9 tokens/s, Avg generation throughput: 57.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 17.1%
|
| 214 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 215 |
+
INFO 01-04 14:19:46 [loggers.py:111] Engine 000: Avg prompt throughput: 1315.0 tokens/s, Avg generation throughput: 56.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.2%, Prefix cache hit rate: 17.8%
|
| 216 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 217 |
+
INFO 01-04 14:19:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 81.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 17.8%
|
| 218 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 219 |
+
INFO 01-04 14:20:06 [loggers.py:111] Engine 000: Avg prompt throughput: 721.5 tokens/s, Avg generation throughput: 37.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 17.4%
|
| 220 |
+
INFO 01-04 14:20:16 [loggers.py:111] Engine 000: Avg prompt throughput: 864.7 tokens/s, Avg generation throughput: 75.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.7%, Prefix cache hit rate: 16.9%
|
| 221 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 222 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 223 |
+
INFO 01-04 14:20:26 [loggers.py:111] Engine 000: Avg prompt throughput: 616.5 tokens/s, Avg generation throughput: 80.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 16.6%
|
| 224 |
+
INFO 01-04 14:20:36 [loggers.py:111] Engine 000: Avg prompt throughput: 670.8 tokens/s, Avg generation throughput: 65.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache hit rate: 16.3%
|
| 225 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 226 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 227 |
+
INFO 01-04 14:20:46 [loggers.py:111] Engine 000: Avg prompt throughput: 454.9 tokens/s, Avg generation throughput: 95.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 16.9%
|
| 228 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 229 |
+
INFO 01-04 14:20:56 [loggers.py:111] Engine 000: Avg prompt throughput: 554.7 tokens/s, Avg generation throughput: 67.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 16.6%
|
| 230 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 231 |
+
INFO 01-04 14:21:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.6%
|
| 232 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 233 |
+
INFO 01-04 14:21:16 [loggers.py:111] Engine 000: Avg prompt throughput: 704.4 tokens/s, Avg generation throughput: 29.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.2%
|
| 234 |
+
INFO 01-04 14:21:26 [loggers.py:111] Engine 000: Avg prompt throughput: 319.7 tokens/s, Avg generation throughput: 30.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 18.5%
|
| 235 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 236 |
+
INFO 01-04 14:21:36 [loggers.py:111] Engine 000: Avg prompt throughput: 397.9 tokens/s, Avg generation throughput: 57.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 19.3%
|
| 237 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 238 |
+
INFO 01-04 14:21:46 [loggers.py:111] Engine 000: Avg prompt throughput: 692.2 tokens/s, Avg generation throughput: 44.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 18.9%
|
| 239 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 240 |
+
INFO 01-04 14:21:56 [loggers.py:111] Engine 000: Avg prompt throughput: 436.7 tokens/s, Avg generation throughput: 53.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 18.8%
|
| 241 |
+
INFO 01-04 14:22:06 [loggers.py:111] Engine 000: Avg prompt throughput: 502.7 tokens/s, Avg generation throughput: 65.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 18.6%
|
| 242 |
+
INFO 01-04 14:22:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 91.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 18.6%
|
| 243 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 244 |
+
INFO 01-04 14:22:26 [loggers.py:111] Engine 000: Avg prompt throughput: 768.5 tokens/s, Avg generation throughput: 102.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache hit rate: 18.2%
|
| 245 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 246 |
+
INFO 01-04 14:22:36 [loggers.py:111] Engine 000: Avg prompt throughput: 481.2 tokens/s, Avg generation throughput: 56.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cache hit rate: 18.0%
|
| 247 |
+
INFO 01-04 14:22:46 [loggers.py:111] Engine 000: Avg prompt throughput: 699.0 tokens/s, Avg generation throughput: 91.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.4%, Prefix cache hit rate: 18.9%
|
| 248 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 249 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 250 |
+
INFO 01-04 14:22:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 91.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 18.9%
|
| 251 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 252 |
+
INFO 01-04 14:23:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 27.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.9%
|
| 253 |
+
INFO 01-04 14:23:16 [loggers.py:111] Engine 000: Avg prompt throughput: 585.8 tokens/s, Avg generation throughput: 29.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 18.6%
|
| 254 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 255 |
+
INFO 01-04 14:23:26 [loggers.py:111] Engine 000: Avg prompt throughput: 399.2 tokens/s, Avg generation throughput: 29.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 18.4%
|
| 256 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 257 |
+
INFO 01-04 14:23:36 [loggers.py:111] Engine 000: Avg prompt throughput: 689.4 tokens/s, Avg generation throughput: 44.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 18.1%
|
| 258 |
+
INFO 01-04 14:23:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 18.1%
|
| 259 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 260 |
+
INFO 01-04 14:23:56 [loggers.py:111] Engine 000: Avg prompt throughput: 946.6 tokens/s, Avg generation throughput: 70.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache hit rate: 17.7%
|
| 261 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 262 |
+
INFO 01-04 14:24:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 69.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 17.7%
|
| 263 |
+
INFO 01-04 14:24:16 [loggers.py:111] Engine 000: Avg prompt throughput: 344.0 tokens/s, Avg generation throughput: 50.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 17.6%
|
| 264 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 265 |
+
INFO 01-04 14:24:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 62.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 17.6%
|
| 266 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 267 |
+
INFO 01-04 14:24:36 [loggers.py:111] Engine 000: Avg prompt throughput: 782.9 tokens/s, Avg generation throughput: 40.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 18.5%
|
| 268 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 269 |
+
INFO 01-04 14:24:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.5%
|
| 270 |
+
INFO 01-04 14:24:56 [loggers.py:111] Engine 000: Avg prompt throughput: 661.2 tokens/s, Avg generation throughput: 35.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 18.2%
|
| 271 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 272 |
+
INFO 01-04 14:25:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.2%
|
| 273 |
+
INFO 01-04 14:25:16 [loggers.py:111] Engine 000: Avg prompt throughput: 1472.3 tokens/s, Avg generation throughput: 43.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.0%, Prefix cache hit rate: 17.6%
|
| 274 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 275 |
+
INFO 01-04 14:25:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 52.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 17.6%
|
| 276 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 277 |
+
INFO 01-04 14:25:36 [loggers.py:111] Engine 000: Avg prompt throughput: 932.1 tokens/s, Avg generation throughput: 54.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 17.3%
|
| 278 |
+
INFO 01-04 14:25:46 [loggers.py:111] Engine 000: Avg prompt throughput: 567.9 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.1%, Prefix cache hit rate: 17.0%
|
| 279 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 280 |
+
INFO 01-04 14:25:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 17.0%
|
| 281 |
+
INFO 01-04 14:26:06 [loggers.py:111] Engine 000: Avg prompt throughput: 1074.1 tokens/s, Avg generation throughput: 52.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.9%, Prefix cache hit rate: 18.2%
|
| 282 |
+
INFO 01-04 14:26:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.2%, Prefix cache hit rate: 18.2%
|
| 283 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 284 |
+
INFO 01-04 14:26:26 [loggers.py:111] Engine 000: Avg prompt throughput: 827.6 tokens/s, Avg generation throughput: 62.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.2%, Prefix cache hit rate: 17.9%
|
| 285 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 286 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 287 |
+
INFO 01-04 14:26:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 32.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.9%
|
| 288 |
+
INFO 01-04 14:26:46 [loggers.py:111] Engine 000: Avg prompt throughput: 864.0 tokens/s, Avg generation throughput: 23.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 17.6%
|
| 289 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 290 |
+
INFO 01-04 14:26:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 42.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.6%
|
| 291 |
+
INFO 01-04 14:27:06 [loggers.py:111] Engine 000: Avg prompt throughput: 763.8 tokens/s, Avg generation throughput: 30.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 17.3%
|
| 292 |
+
INFO 01-04 14:27:16 [loggers.py:111] Engine 000: Avg prompt throughput: 522.4 tokens/s, Avg generation throughput: 49.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.4%, Prefix cache hit rate: 17.9%
|
| 293 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 294 |
+
INFO 01-04 14:27:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 64.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 17.9%
|
| 295 |
+
INFO 01-04 14:27:36 [loggers.py:111] Engine 000: Avg prompt throughput: 788.4 tokens/s, Avg generation throughput: 76.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.7%, Prefix cache hit rate: 17.6%
|
| 296 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 297 |
+
INFO 01-04 14:27:46 [loggers.py:111] Engine 000: Avg prompt throughput: 528.2 tokens/s, Avg generation throughput: 85.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache hit rate: 17.5%
|
| 298 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 299 |
+
INFO 01-04 14:27:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 17.5%
|
| 300 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 301 |
+
INFO 01-04 14:28:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.5%
|
| 302 |
+
INFO 01-04 14:28:16 [loggers.py:111] Engine 000: Avg prompt throughput: 840.4 tokens/s, Avg generation throughput: 40.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 18.6%
|
| 303 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 304 |
+
INFO 01-04 14:28:26 [loggers.py:111] Engine 000: Avg prompt throughput: 1025.3 tokens/s, Avg generation throughput: 30.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: 19.9%
|
| 305 |
+
INFO 01-04 14:28:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 19.9%
|
| 306 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 307 |
+
INFO 01-04 14:28:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 11.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.9%
|
| 308 |
+
INFO 01-04 14:28:56 [loggers.py:111] Engine 000: Avg prompt throughput: 907.3 tokens/s, Avg generation throughput: 8.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 19.6%
|
| 309 |
+
INFO 01-04 14:29:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 19.6%
|
| 310 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 311 |
+
INFO 01-04 14:29:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 31.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.6%
|
| 312 |
+
INFO 01-04 14:29:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.6%
|
| 313 |
+
INFO 01-04 14:29:36 [loggers.py:111] Engine 000: Avg prompt throughput: 721.0 tokens/s, Avg generation throughput: 18.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 19.4%
|
| 314 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 315 |
+
INFO 01-04 14:29:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.4%
|
| 316 |
+
INFO 01-04 14:29:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.4%
|
| 317 |
+
INFO 01-04 14:30:16 [loggers.py:111] Engine 000: Avg prompt throughput: 608.1 tokens/s, Avg generation throughput: 37.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 19.1%
|
| 318 |
+
INFO 01-04 14:30:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 19.1%
|
| 319 |
+
INFO 01-04 14:30:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 19.1%
|
| 320 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 321 |
+
INFO 01-04 14:30:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.1%
|
| 322 |
+
INFO 01-04 14:30:56 [loggers.py:111] Engine 000: Avg prompt throughput: 773.2 tokens/s, Avg generation throughput: 42.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 18.9%
|
| 323 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 324 |
+
INFO 01-04 14:31:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 28.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.9%
|
| 325 |
+
INFO 01-04 14:31:16 [loggers.py:111] Engine 000: Avg prompt throughput: 1030.2 tokens/s, Avg generation throughput: 14.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 18.6%
|
| 326 |
+
INFO 01-04 14:31:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 18.6%
|
| 327 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 328 |
+
INFO 01-04 14:31:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 20.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.6%
|
| 329 |
+
INFO 01-04 14:31:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.6%
|
| 330 |
+
INFO 01-04 14:31:56 [loggers.py:111] Engine 000: Avg prompt throughput: 591.5 tokens/s, Avg generation throughput: 30.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 18.4%
|
| 331 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 332 |
+
INFO 01-04 14:32:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 39.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.4%
|
| 333 |
+
INFO 01-04 14:32:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.4%
|
| 334 |
+
INFO 01-04 14:32:46 [loggers.py:111] Engine 000: Avg prompt throughput: 1263.6 tokens/s, Avg generation throughput: 29.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hit rate: 19.3%
|
| 335 |
+
INFO 01-04 14:32:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cache hit rate: 19.3%
|
| 336 |
+
INFO 01-04 14:33:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.4%, Prefix cache hit rate: 19.3%
|
| 337 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 338 |
+
INFO 01-04 14:33:16 [loggers.py:111] Engine 000: Avg prompt throughput: 838.5 tokens/s, Avg generation throughput: 56.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 19.0%
|
| 339 |
+
INFO 01-04 14:33:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 19.0%
|
| 340 |
+
INFO: 10.43INFO 01-04 14:33:48 [loggers.py:111] Engine 000: Avg prompt throughput: 558.6 tokens/s, Avg generation throughput: 22.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 15.1%
|
| 341 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 342 |
+
INFO 01-04 14:33:58 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 28.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cacINFO 01-04 14:34:16 [loggers.py:111] Engine 000: Avg prompt throughput: 757.7 tokens/s, Avg generation throughput: 15.4 tokens/s,INFO 01-04 14:34:27 [loggers.py:111] Engine 000: Avg prompt throughput: 697.8 tokens/s, Avg generation throughput: 19.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 16.5%
|
| 343 |
+
INFO 01-04 14:34:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 16.5%
|
| 344 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 345 |
+
INFO 01-04 14:34:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.5%
|
| 346 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 347 |
+
INFO 01-04 14:34:57 [lINFO 01-04 14:35:26 [loggers.py:111] Engine 000: Avg prompt throughput: 870.9 tokens/s, Avg generation throughput: 6.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 18.5%
|
| 348 |
+
INFO 01-04 14:35:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix INFO 01-04 14:37:07 [loggers.py:111] Engine 000: Avg prompt throughput: 453.7 tokens/s, Avg generation throughput: 7.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 16.2%
|
| 349 |
+
INFO 01-04 14:37:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 16.2%
|
| 350 |
+
INFO 01-04 14:37:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 16.2%
|
| 351 |
+
INFO 01-04 14:37:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 16.2%
|
| 352 |
+
INFO 01-04 14:37:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 16.2%
|
| 353 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 354 |
+
INFO 01-04 14:37:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 2.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.2%
|
| 355 |
+
INFO 01-04 14:38:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.2%
|
| 356 |
+
INFO 01-04 14:38:47 [loggers.py:111] Engine 000: Avg prompt throughput: 632.5 tokens/s, Avg generation throughput: 24.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 16.0%
|
| 357 |
+
INFO 01-04 14:38:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 16.0%
|
| 358 |
+
INFO 01-04 14:39:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 16.0%
|
| 359 |
+
INFO 01-04 14:39:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 16.0%
|
| 360 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 361 |
+
INFO 01-04 14:39:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 39.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.0INFO 01-04 14:39:44 [loggers.py:111] Engine 000: Avg prompt throughput: 657.7 tokens/s, Avg generation throughput: 28.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 10.8%INFO 01-04 14:40:17 [loggers.py:111] Engine 000: Avg prompt throughput: 901.7 tokens/s, Avg generation throughput: 25.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 15.8%
|
| 362 |
+
INFO 01-04 14:40:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, WaitingINFO 01-04 14:41:26 [loggers.py:111] Engine 000: Avg prompt throughput: 1032.5 tokens/s, Avg generation throughput: 38.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: 18.4%
|
| 363 |
+
INFO 01-04 14:41:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 18.4%
|
| 364 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 365 |
+
INFO 01-04 14:41:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 13.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.4%
|
| 366 |
+
INFO 01-04 14:41:56 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:41:57 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:42:44 [loggers.py:111] Engine 000: Avg prompt throughput: 657.9 tokens/s, AINFO 01-04 14:44:56 [loggers.py:111] Engine 000: Avg prompt throughput: 719.8 tokens/s, Avg generation throughput: 41.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 18.2%
|
| 367 |
+
INFO 01-04 14:45:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 18.2%
|
| 368 |
+
INFO 01-04 14:45:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, PrefiINFO 01-04 14:46:44 [loggers.py:111] Engine 000: Avg prompt throughput: 1221.5 tokens/s, Avg generation throughput: 36.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 11.1%
|
| 369 |
+
INFO 01-04 14:46:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hit rate: 11.1%
|
| 370 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 371 |
+
INFO 01-04 14:47:04 INFO 01-04 14:48:56 [loggers.py:111] Engine 000: Avg prompt throughput: 377.9 tokens/s, Avg generation throughput: 35.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 18.1%
|
| 372 |
+
INFO 01-04 14:49:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, PrefixINFO 01-04 14:51:44 [loggers.py:111] Engine 000: Avg prompt throughput: 892.0 tokens/s, Avg generation throughput: 14.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 11.0%
|
| 373 |
+
INFO 01-04 14:51:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 11.0%
|
| 374 |
+
INFO 01-04 14:52:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 11.0%
|
| 375 |
+
INFO 01-04 14:52:14 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s,INFO 01-04 15:01:06 [loggers.py:111] Engine 000: Avg prompt throughput: 476.6 tokens/s, Avg generation throughput: 9.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 18.5%
|
| 376 |
+
INFO 01-04 15:01:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, RunINFO 01-04 15:02:05 [loggers.py:111] Engine 000: Avg prompt throughput: 481.1 tokens/s, Avg generation throughput: 22.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 17.INFO 01-04 15:09:17 [loggers.py:111] Engine 000: Avg prompt throughput: 542.4 tokens/s, Avg generation throughput: 14.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 15.6%
|
| 377 |
+
INFO 01-04 15:09:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 15.6%
|
| 378 |
+
INFO 01-04 15:09:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 15.6%
|
| 379 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 380 |
+
INFO 01-04 15:09:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 29.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.6%
|
| 381 |
+
INFO 01-04 15:09:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.6%
|
| 382 |
+
5:04:28 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 18.2%
|
| 383 |
+
INFO 01-04 15:04:38 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 18.2%
|
| 384 |
+
INFO 01-04 15:04:48 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 18.2%
|
| 385 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 386 |
+
INFO 01-04 15:04:58 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 27.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.2%
|
| 387 |
+
INFO 01-04 15:05:08 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.2%
|
hf_ip/vllm_gpu2.log
ADDED
|
@@ -0,0 +1,327 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 2 |
+
warnings.warn(
|
| 3 |
+
INFO 01-04 13:13:30 [__init__.py:239] Automatically detected platform cuda.
|
| 4 |
+
INFO 01-04 13:13:32 [api_server.py:1043] vLLM API server version 0.8.5
|
| 5 |
+
INFO 01-04 13:13:32 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8003, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
|
| 6 |
+
INFO 01-04 13:13:39 [config.py:717] This model supports multiple tasks: {'reward', 'score', 'classify', 'generate', 'embed'}. Defaulting to 'generate'.
|
| 7 |
+
INFO 01-04 13:13:40 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
|
| 8 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 9 |
+
warnings.warn(
|
| 10 |
+
INFO 01-04 13:13:44 [__init__.py:239] Automatically detected platform cuda.
|
| 11 |
+
INFO 01-04 13:13:47 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
|
| 12 |
+
WARNING 01-04 13:13:47 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7fbf15272710>
|
| 13 |
+
INFO 01-04 13:13:49 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
|
| 14 |
+
INFO 01-04 13:13:49 [cuda.py:221] Using Flash Attention backend on V1 engine.
|
| 15 |
+
WARNING 01-04 13:13:49 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
|
| 16 |
+
INFO 01-04 13:13:49 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
INFO 01-04 13:15:40 [loader.py:458] Loading weights took 111.25 seconds
|
| 35 |
+
INFO 01-04 13:15:41 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 111.510422 seconds
|
| 36 |
+
INFO 01-04 13:15:55 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
|
| 37 |
+
INFO 01-04 13:15:55 [backends.py:430] Dynamo bytecode transform time: 14.56 s
|
| 38 |
+
INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later use
|
| 39 |
+
INFO 01-04 13:16:50 [backends.py:148] Compiling a graph for general shape takes 53.12 s
|
| 40 |
+
INFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 67.68 s in total
|
| 41 |
+
INFO 01-04 13:17:54 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
|
| 42 |
+
INFO 01-04 13:17:54 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94x
|
| 43 |
+
INFO 01-04 13:18:33 [gpu_model_runner.py:1686] Graph capturing finished in 39 secs, took 1.21 GiB
|
| 44 |
+
INFO 01-04 13:18:33 [core.py:159] init engine (profile, create kv cache, warmup model) took 172.10 seconds
|
| 45 |
+
INFO 01-04 13:18:33 [core_client.py:439] Core engine process 0 ready.
|
| 46 |
+
WARNING 01-04 13:18:33 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
|
| 47 |
+
INFO 01-04 13:18:33 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 48 |
+
INFO 01-04 13:18:33 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 49 |
+
INFO 01-04 13:18:33 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8003
|
| 50 |
+
INFO 01-04 13:18:33 [launcher.py:28] Available routes are:
|
| 51 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /openapi.json, Methods: GET, HEAD
|
| 52 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /docs, Methods: GET, HEAD
|
| 53 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: GET, HEAD
|
| 54 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /redoc, Methods: GET, HEAD
|
| 55 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /health, Methods: GET
|
| 56 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /load, Methods: GET
|
| 57 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /ping, Methods: POST, GET
|
| 58 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /tokenize, Methods: POST
|
| 59 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /detokenize, Methods: POST
|
| 60 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/models, Methods: GET
|
| 61 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /version, Methods: GET
|
| 62 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
|
| 63 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/completions, Methods: POST
|
| 64 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/embeddings, Methods: POST
|
| 65 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /pooling, Methods: POST
|
| 66 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /score, Methods: POST
|
| 67 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/score, Methods: POST
|
| 68 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
|
| 69 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /rerank, Methods: POST
|
| 70 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/rerank, Methods: POST
|
| 71 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v2/rerank, Methods: POST
|
| 72 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /invocations, Methods: POST
|
| 73 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /metrics, Methods: GET
|
| 74 |
+
INFO: Started server process [1316160]
|
| 75 |
+
INFO: Waiting for application startup.
|
| 76 |
+
INFO: Application startup compINFO 0INFO 01-04 13:59:58 [loggers.py:111] Engine 000: Avg prompt throughput: 116.3 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.7%, Prefix cache hit rateIINFO 01-04 14:00:07 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:00:08 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 53.6 tokens/s, Running: 1 reqs, IINFOININFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 2IINFINFO 01-04 14:00:18 [loggers.py:111] Engine 000: Avg prompt throughput: 61.2 tokens/s, Avg generation throughput: 80.6 tokens/s, Running: 2 reqs,INFO INFO 01-04 14:00:23 [loggers.py:111] Engine 000: Avg prompt thINFOINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1"INFO 01INFO INFO 01-04 14:00:28 [loggers.py:111] Engine 000: Avg prompt throughput: 86.4 tokens/INFO: 10.43.30.5:0 - "GET /v1/models HTTP/1.1" 200 OK
|
| 77 |
+
INFO 01-04 14:00:33 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:00:37 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:00:38 [loggers.py:111] Engine 000: Avg prompt throughput: 82.1 tokens/s, Avg generation throughput: 81.6 tokens/s, Running: 2 reqINFINFO 01-04 14:00:46 [loggers.py:111] Engine 000: Avg prompt througINFO INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 78 |
+
INFO 01-04 14:00:48 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokeINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/INFO 01-04 IINFO 01-04 14:00:56 [loggers.py:111] Engine 000: Avg prompt throughput: 76.3 tokens/s, Avg generation throughput: 86.0 tokens/s, RunnINFO 01-04 14:00:58 [loggers.py:111] Engine 000: Avg prompt throughput: INFO: INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 79 |
+
INFO 01-04 14:01:07 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:01:08 [loggers.py:111] Engine 000: Avg prompt throughput: 61.2 tokens/s, Avg generation throughput: 51.3 tokens/s, Running: INFO: INFOINFO 01-04 14:01:16 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:01:17 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:01:18 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 60.6 tokens/s, Running:IINFO: INFO: INFO 01-04 14:01:26 [loggers.py:111] Engine 000: AINFO 01-0INFO 0INFO 01-04 14:01:28 [loggers.py:111] Engine 000: Avg prompt throughput: 62.5 tokens/s, Avg generation throughput: 83.0 tokens/s, Running: 2 reqs, WaitingINFO: 10.46.50.192:0 - "POST /v1/completions INFO 01-04 14:01:36 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 INFO 01-04 14:01:38 [loggers.py:111] Engine 000: Avg prompt throughput: 96.0 tokens/s, Avg generation throughput: 81.8 tokens/s, RunniINFO: 10.45.19INFO 01-04 14:01:46 [loggers.py:111] Engine 000: INFO 01-04 1INFO INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 80 |
+
INFO 01-04 14:01:48 [loggers.py:111] Engine 000: Avg prompt throughput: 78.3 tokens/s,INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" INFO 01-04 14:01:56 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:01:57 [loggers.py:111] Engine 000: Avg prompt throughput: 86.6 toINFO 01-04 14:01:58 [loggers.py:111] Engine 000: Avg prompt throughput: 95.2 tokens/s, Avg generation throughput: 96.9 tokens/s, RINFO 01-INFO 01-04 14:02:07 [loggers.py:111] Engine 000: Avg prompt throughput: 73.5 tokINFO 01-04 14:02:08 [loggers.py:111] Engine 000: Avg prompt throughput: 76.5 tokens/s, Avg generation throughput: 111.0 tokens/sINFO 01-04 14:INFO 01-04 14:02:16 [loggers.py:111] Engine 000: Avg prompt throughput: 86.9 tokens/s, Avg generation throughput: 91.5 tokens/sINFO 01-04 14:02INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 20INFO: 10.INFO: 10.46.50.192:0 - "POST /v1/completions HTTPINFO 01-04 14INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1"INFOINFO 01-04 14:02:27 [loggers.py:111] Engine 000: Avg prompt throughput: 85.7 tokens/s, Avg generation throughput: 82.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit ratINFO: 10.INFO 01-04 14:02:36 [loggers.py:111] Engine 000: Avg prompt throughput: 66.0 tokens/s, Avg generation throughput: 89.0 tokens/INFO 01-04 14:02:37 [loggers.py:111] Engine 000: Avg prompt throughput: 102.7 tokens/s,INFOINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 81 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1INFO 0INFO 01-04 14:02:47 [loggers.py:111] Engine 000: Avg prompt throughput: 145.2 tokens/s, Avg generation throughput: 62.3 tokens/s, Running: 2 reqs, WaitinINFO 01-04 14:02:48 [loggers.py:111] Engine 000: Avg prompININFO: INFO 01-04 14:02:56 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:02:57 [loggers.py:111] Engine 000: Avg prompt throughput: 104.7 tokens/s, Avg generation throughput: 98.4 tokens/s, Running: 2 reqs, WaINFOINFO 01-04 14:03:06 [loggers.py:111] Engine 000: Avg prompt throughput: 165.1 tokens/s, Avg generation throughput: 75.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, PrefiINFO 01-04 14:03:07 [loINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 82 |
+
INFO 01-04 14:03:16 [loggers.py:111] Engine 000: Avg prompt throughput: 121.6 tokens/s, Avg generation throughput: 106.5 tokens/sINFO 01-04 14:03:17 [loggers.py:111] Engine 000: Avg prompt throughput: 117.8 tokens/sINFO 01-04 14:03:18 [loggers.py:111] Engine 000: Avg prompINFO: 10INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 83 |
+
INFO 01-04 14:03:26 [loggers.py:111] Engine 000: Avg prompt INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 84 |
+
INFO 01-04 14:03:27 [INFO 01-04 14:03:28 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:03:36 [loggers.py:111] Engine 000: Avg prompt throughput: 123.7 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.7%, PreINFO 01-04 14:03:37 [loININFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.INFO 01-04INFO 01-04 14:03:46 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:03:46 [loggers.py:111] Engine 000: Avg prompt throughput: 149.4 tokens/s, Avg generation throughput: 75.5 tokens/s, Running: 2 reqs, WaINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 85 |
+
INFO 01-04 1INFO 01-04 14:03:56 [loggers.py:111] Engine 000: Avg prompt throughput: 283.3 tokens/s, Avg generation throughput: 44.5 tokens/s, Running: 2 reqs, WaiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 86 |
+
INFO 01-0INFO 01-04 14:04:03 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:04:06 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:04:06 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:04:07 [loggers.py:111] Engine 000: Avg prompt throughput: 114.7 tINFO 01-INFO 01-04 14:04:16 [loggers.py:111] Engine 000: Avg prompt throughput: 166.0 tokens/s, Avg generation throughput: 77.1 tokens/s, RINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 87 |
+
INFO 01-04 14:04INFO: INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 88 |
+
INFO: INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" INFO 01-04 14:04:26 [loggers.py:111] Engine 000: Avg prompt throughput: 180.8 tokens/s, Avg generation throughput: 71.2 tokens/s,INFO 01-04 14:04:27 [logINFO 01-04 14:04:28 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:04:36 [loggers.py:111] Engine 000: Avg prompt throughput: 178.4 tokens/s, Avg generation throughput: 74.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 3.5%
|
| 89 |
+
IINFO 01-04 14:04:38 [loggers.py:111] Engine 000: Avg promptINFO: INFOINFO 01-04 14:04:43 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:04:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.2 tokeINFO 01-04 14:04:47 [loggers.INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 2INFOINFO 01-04 14:04:56 [loggers.py:111] Engine 000: Avg prompt throughput: 95.0 tokens/s, Avg generation throughput: 24.4 tokININFO 01-04 14:04:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg genINFO 01-04 14:05:06 [loggers.py:111] Engine 000: Avg prompt throughput: 119.1 tokens/s, Avg generation throughput: 78.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%,INFO 01-04 14:05:06 [loggers.INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 90 |
+
INFO 01-04 14:05:16 [loggers.py:111] Engine 000: Avg prompt throughput: 217.8 tokens/s, Avg generation throughput: 115.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 3.0%
|
| 91 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 92 |
+
INFO: INFO 01-04 14:05:23 [loggers.py:111] Engine 000: Avg prompINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 93 |
+
INFO 01-04 14:05:26 [loggers.py:111] Engine 000: Avg prompt throughput: 211.6 tokens/s, Avg genINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 94 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1INFO 01-04 14:05:36 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:05:36 [loggers.py:111] Engine 000: Avg prompt throughput: 153.9 tokens/s, Avg generation throughput: 103.7 tokens/s, Running: 3 reqs, Waiting: 0 INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 95 |
+
INFO 01-04 14:05:46 [loggers.py:111] Engine 000: Avg prompt throughput: 191.4 tokens/s, Avg generation throughput: 59.1 tokens/s, Running: 1 reqs, WaitingINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 96 |
+
INFOINFO 01-04 14:05:53 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:05:56 [loggers.py:111] Engine 000: Avg prompt throughput: 254.5 tokens/s, Avg generation throughput: 18.7 INFO 01-04 14:05:56 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:05:57 [loggers.py:11INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 97 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 98 |
+
INFO 01-04 14:06:06 [loggers.py:111] Engine 000: AvINFO 01-04 14:06:07 [loggers.py:111] Engine 000: Avg prompt throughput: 230.6 tokens/s, Avg generationINFO 01-04 14:06:08 [loggers.py:111] Engine 000: Avg promptINFO: INFO 01-04 14:06:16 [loggers.py:111] Engine 000: Avg prompt throughput: 244.9 tokens/s, Avg generation throughput: 22.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1INFO 01-04 14:06:17 [loggers.py:1INFO 01-04 14:06:26 [loggers.py:111] Engine 000: Avg prompt throughput: 158.9 tokens/s, Avg generation throughput: 74.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2INFO: 10.45.190.192:0 - "POSTINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 99 |
+
INFO 01-04 14:06:36 [loggers.py:111] Engine 000: Avg prompt throughput: 223.0 tokens/s, Avg generation throughput: 120.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO: 10.46.17.192:0 - "POST /vINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 100 |
+
INFO 01-04 14:06:46 [loggers.py:111] Engine 000: Avg prompt throughput: 254.9 tokens/s, Avg generation throughput: 128.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 10.4%
|
| 101 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 102 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/INFO 01-04 1INFO 01-04 14:06:56 [loggers.py:111] Engine 000: Avg prompt throughput: 436.6 tokens/s, Avg generation throughput: 43.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache INFO 01-04 14:06:57 [loggers.py:111] EINFOINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 INFO 01-04 14:07:03 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:07:06 [loggers.py:111] EnginINFO 01-04 14:07:06 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:07:07 [loggers.py:111] EINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 103 |
+
INFININFO 01-04 14:07:16 [loggers.py:111] Engine 000: Avg prompt throughput: 205.2 tokens/s, Avg generation throughput: 42.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cachINFO 01-04 14:07:17 [loggers.py:111] EnINFO 01-04 14:07:23 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:07:26 [loggers.py:111] Engine 000: Avg prompt throughput: 238.8 tokens/s, Avg generation throuINFO 01-04 14:07:27 [loggers.py:111] Engine 000: Avg prompt throughput: 148.8 tokens/s, Avg generation thrINFO INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 104 |
+
INFO 01-04 14:07:36 [loggers.py:111] Engine 000: Avg prompt throughput: 234.1 tokens/s, Avg generation IINFO 01-04 14:07:37 [loggers.py:111] EngineINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200INFOINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 105 |
+
INFO 0INFO 01-04 14:07:46 [loggers.py:111] Engine 000: Avg prompt throughput: 269.4 tokens/s, Avg generation throughput: 52.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPINFO 01-04 14:07:47 [loggers.py:111] EngiINFO 0INFO: INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 106 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 107 |
+
INFO 01-04 14:07:56 [loggers.py:111]INFO 01-04 14:07:56 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:07:57 [loggers.py:111] EnINFO 01-04 14:07:58 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 1INFO 01-04 14:08:06 [loggers.py:111] Engine 000: Avg prompt throughput: 286.0 tokens/s, Avg generaINFO 01-04 14:08:07 [loggers.py:111] Engine 000: Avg prompt throughput: 194.5 tokens/s, Avg generation throuINFO: 10.INFO: 10.46.50.192:0 - "POST /v1/completINFO: INFO 01-04 14INFO 01-04 14:08:16 [loggers.py:111] Engine 000: Avg prompt throughput: 291.9 tokens/s, Avg generINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 108 |
+
INFO 01-04 14:08:17 [loggers.py:111] Engine INFO: 10.4INFO: 10.45.190.192:0 - "POST /v1/completions HTINFO 01-04 14:0INFO 01-04 14:08:26 [loggers.py:111] Engine 000: Avg prompt throughput: 276.7 tokens/s, Avg generation throughput: 65.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache INFO 01-04 14:08:33 [loggers.py:111] Engine 000: Avg prompt throughput: 232.9 tokenINFO 01-04 14:08:36 [loggers.py:111] Engine 000: Avg prompt throughput: 389.1 tokens/s, Avg generation throughput: 25.7 tokens/s, RunniINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 2INFO 0INFO 01-04 14:08:43 [loggers.py:111] Engine 000: Avg prompt throughput: 343.1 tokeINFO 01-04 14:08:46 [loggers.py:111] Engine 000: Avg prompt throughput: 205.5 tokens/s, Avg generation throughput: 45.2 tokens/s, RuINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 109 |
+
INFO: 10.46.17.1INFO 01-04 14:08:56 [loggers.py:111] Engine 000: Avg prompt throughput: 254.5 tokens/s, Avg generation throughput: 76.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 17.9%
|
| 110 |
+
INFO: 10.45.190.192:0 - "POST /v1/completiINFO: 10.43.30.4:INFO 01-04 14:09:06 [loggers.py:111] Engine 000: Avg prompt throughput: 307.6 tokens/s, Avg generation throughput: 68.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 17.8%
|
| 111 |
+
INFO 01-04 14:09:16 [loggers.py:111] Engine 000: Avg prompt throughput: 352.6 tokens/s, Avg generation throughput: 91.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 17.1%
|
| 112 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 113 |
+
INFO 01-04 14:09:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 135.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 17.1%
|
| 114 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 115 |
+
INFO: 10.46.50.192:0 - "POST /v1/compINFO 01-04 14:09:33 [loggINFO 01-04 14:09:36 [loggers.py:111] Engine 000: Avg prompt throughput: 418.5 tokens/s, Avg generation throughput: 57.7 tokens/s, Running: 1 reqs, Waiting: 0INFO 01-04 14:09:36 [loggers.py:111]INFO: 10.46.17.192:0 -INFO: 10.43.30.4:0 - "POST /v1/complINFO 01-04 14:09:43 [logINFO 01-04 14:09:46 [loggers.py:111] Engine 000: Avg prompt throughput: 281.4 tokens/s, Avg generation throughput: 43.8 tokens/s, Running: 1 reqs, Waiting: 0 rINFO 01-04 14:09:46 [loggers.py:111INFO: 10.43.30.4:0 - INFO 01-04 14:09:56 [loggers.py:111] Engine 000: Avg prompt throughput: 337.7 tokens/s, Avg generation throughput: 81.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 19.4%
|
| 116 |
+
INFO: 10.43.30.4:0 - "POST /v1/comINFO 01-04 14:10:03 [loggeINFO 01-04 14:10:06 [loggers.py:111] Engine 000: Avg prompt throughput: 369.8 tokens/s, Avg generation throughput: 96.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 18.7%
|
| 117 |
+
INFO: 10.46.50.192:0 - "POST /v1/coINFO 01-04 14:10:13 [loggerINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 118 |
+
INFO 01-04 14:10:16 [loggers.py:111] Engine 000: Avg prompt throughput: 674.9 tokens/s, Avg generation throughput: 99.4 tokenINFO 01-04 14:10:23 [loggers.py:111] Engine 000: Avg prompt throughput: 264.6 tokens/s, Avg generation throughput: 127.9 tokens/s, Running: 3 reqs, Waiting: 0INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 INFINFO 01-04 14:10:26 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:10:33 [loggers.py:111] Engine 000: Avg prompt throughput: 205.2 tokens/s, Avg generation throughput: 116.9 tokens/s, Running: 3 reqs, WaitiINFO 01-04 14:10:36 [loggers.py:111] Engine 000: Avg prompt througINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 119 |
+
INFO 01-04 14:10:43 [loggers.py:111] Engine 000: Avg prompt throughput: 327.7 tokens/sINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 120 |
+
INFO 01-04 14:10:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 41.8 tokens/s, RunniINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 121 |
+
INFO 01-04 14:10:53 INFO 01-04 14:10:56 [loggers.py:111] Engine 000: Avg prompt throughput: 339.0 tokens/s, Avg generation throughput: 3.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 122 |
+
INFO 01-04 14:11:03 [lINFO 01-04 14:11:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg geINFO 01-04 14:11:16 [loggers.py:111] Engine 000: Avg prompt throughput: 479.2 tokens/s, Avg generation throughput: 12.6 tokenINFO 01-04 14:11:16 [loggers.py:111] Engine 000: Avg prompt throughput: 359.1 tokens/s, Avg geINFO 01-04 14:11:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, PrefINFO 01-04 14:11:26 [loggINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 123 |
+
INFO 01-04 14:11:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation INFO 01-04 14:11:43 [loggers.py:111] Engine 000: Avg prompt throughput: 357.3 tokens/s, Avg generation throughput: 3INFO 01-04 14:11:46 [loggers.py:111] Engine 000: Avg prompt throughput: 352.1 tokens/s, Avg generatioINFO 01-04 14:11:53 [loggeINFO 01-04 14:11:56 [loggers.py:111] Engine 000: Avg prompt throughput: 277.9 tokens/s, Avg generation throughput: 14.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, INFO: 10.46.50.192:0 - "PINFO: 10.46.50.192:0 - "POST /v1/INFO 01-04 14:12:03 [loggers.INFO 01-04 14:12:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 32.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%,INFO 01-04 14:12:13 [loggers.pINFO 01-04 14:12:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvgINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 124 |
+
INFO 01-04 14:12:26 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:12:26 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:12:27 [loggers.py:111] Engine 000: Avg prompt throughput: 313.4 tokens/s, Avg generation INFO 01-04 14:12:36 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:12:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 95.0 tokens/s, Running: 2 reqs, Waiting: 0INFO 01-04 14:12:46 [loggers.py:111] Engine 000: Avg prompt throughput: 563.3 tokens/s, Avg generation throughput:INFO 01-04 1INFO 01-04 14:12:46 [loggers.py:111] Engine 000: Avg prompt throughput: 284.4 tokens/s, Avg gINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 125 |
+
INFO 01-04 14:12:56 [loggers.py:111] Engine 000: INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 126 |
+
INFO 01-04 14:12:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation tINFO 01-04 14:13:06 [loggers.py:111] Engine 000: Avg prompt throughput: 242.2 tokens/s, Avg generation throughput:INFO 01-04 14:13:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throINFO 01-04 14:13:16 [loggers.py:111] Engine 000: Avg prompt throughput: 273.4 tokens/s, Avg generation throughINFO 01-04 14:13:17 [loggers.py:111] Engine 000: Avg prompt throughput: 433.9 tokens/s, Avg generation througINFO 01-04 14:13:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cacheINFO 01-04 14:13:27 [loggers.py:111] EngineINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 127 |
+
INFO 01-04 14:13:36 [loggers.py:111] Engine 000: Avg prompt throughput: 595.4 tokens/s, Avg generation thrINFO 01-04 14:13:37 [loggers.py:111] Engine 000: Avg prompt throughput: 299.7 tokens/s, Avg generation throughputINFO 01-04 14:13:46 [loggers.py:111] Engine 000: Avg prompt throughput: 281.5 tokens/s, Avg generation throughput: 154.6 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.7%, Prefix cache hit rate: 10.1%
|
| 128 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 129 |
+
INFO: 10.46.17.192:0 - "POST /v1/coINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 130 |
+
INFO: 10.46.17.192:0 INFO 01-04 14:13:56 [loggers.py:111] EngiINFO 01-04 14:13:57 [loggers.py:111] Engine 000: Avg prompt throughput: 298.1 tokens/s, Avg generation throughput: 52.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 5.1%
|
| 131 |
+
INFO: 10.43.30.3:0 - INFO 01-04 14:14:06 [loggers.py:111] EnINFO 01-04 14:14:07 [loggers.py:111] Engine 000: Avg prompt throughput: 379.9 tokens/s, Avg generation throughput: 12.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usINFO: 10.46.17.192:0 - "POST /v1/coINFO: 10.43.30.4:0 - "PINFO 01-04 14:14:16 [loggers.py:111] INFO 01-04 14:14:17 [loggers.py:111] Engine 000: Avg prompt throughput: 369.1 tokens/s, Avg generation throughput: 51.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usINFO 01-04 14:14:26 [loggers.py:111] EnINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 132 |
+
INFO 01-04 14:14:27 [loggers.py:111] Engine 000: Avg prompt throughput: 395.9 tokens/s, Avg generation throughput: 6INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 133 |
+
INFO: 10.45.190.192:0 - "POST /v1/INFO: 10.46.17.192:0 - "PINFO 01-04 14:14:36 [loggers.py:111] INFO 01-04 14:14:37 [loggers.py:111] Engine 000: Avg prompt throughput: 158.3 tokens/s, Avg generation throughput: 51.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usagINFO: 10.46.17.192:0 - "POST /v1/INFO 01-04 14:14:47 [loggers.py:111] Engine 000: Avg prompt throughput: 600.4 tokens/s, Avg generation throughput: 76.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 5.2%
|
| 134 |
+
INFO: 10.43.30.5:0 - "POSINFO: 10.43.30.5:0 - "POST /v1/INFO 01-04 14:14:57 [loggers.py:111] Engine 000: Avg prompt throughput: 326.7 tokens/s, Avg generation throughput: 95.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 5.1%
|
| 135 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 136 |
+
INFO 01-04 14:15:07 [loggers.py:111] Engine 000: Avg prompt throughput: 268.5 tokens/s, Avg generation throughput: 101.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 5.9%
|
| 137 |
+
INFO: 10.43.30.3:0 - "INFO: 10.46.50.192:0 - "POST /v1/cINFO 01-04 14:15:17 [loggers.py:111] Engine 000: Avg prompt throughput: 853.6 tokens/s, Avg generation throughput: 97.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 7.9%
|
| 138 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 139 |
+
INFO 01-04 14:15:27 [loggers.py:111] Engine 000: Avg prompt throughput: 627.9 tokens/s, Avg generation throughput: 121.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.0%, Prefix cache hit rate: 7.6%
|
| 140 |
+
INFO: 10.46.17.192:0 - INFO 01-04 14:15:36 [loggers.py:111] Engine 000: Avg prompt throughput: 339.4 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 10.6%
|
| 141 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 142 |
+
INFO 01-04 14:15:46 [loggers.py:111] INFO 01-04 14:15:47 [loggers.py:111] Engine 000: Avg prompt throughput: 336.0 tokens/s, Avg generation throughput: 105.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usagINFO: 10.43.30.3:0 - "POST /v1/coINFO: 10.43.30.4:0 - "PINFO 01-04 14:15:56 [loggers.py:111] INFO 01-04 14:15:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 128.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-04 14:16:06 [loggers.py:11INFO 01-04 14:16:07 [loggers.py:111] Engine 000: Avg prompt throughput: 435.4 tokens/s, Avg generation throughput: 115.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 143 |
+
INFO 01-04 14:16:16 [loggers.py:111] Engine 000: Avg prompt throughput: 528.1 tokens/s, Avg geneINFO 01-04 14:16:17 [loggers.py:111] Engine 000: Avg prompt throughput: 574.4 tokens/s, Avg generation throughput: 90.7 tokeINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 144 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 145 |
+
INFO: 10.46.17.192:0 - "INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 146 |
+
INFO 01-04 14:16:27 [loggers.py:111] Engine 000: Avg prompt throughput: 563.7 tokens/s, Avg generation throughput: 75.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, PrINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 147 |
+
INFO 01-04 14:16:36 [loINFO 01-04 14:16:37 [loggers.py:111] Engine 000: Avg prompt throughput: 296.2 tokens/s, Avg generation throughput: 36.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, PrefixINFO 01-04 14:16:46 [lINFO 01-04 14:16:47 [loggers.py:111] Engine 000: Avg prompt throughput: 461.9 tokens/s, Avg generation throughput: 56.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, PrefiINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 148 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 149 |
+
INFO 01-04 14:16:56 [logINFO 01-04 14:16:57 [loggers.py:111] Engine 000: Avg prompt throughput: 529.9 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache uINFO 01-04 14:16:INFO 01-04 14:17:06 [loINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 150 |
+
INFO 01-04 14:17:07 [loggers.py:111] Engine 000: Avg prompt throughput: 368.6 tokens/s, Avg generation throughput: 85.2 tokens/s, INFO 01-04 14:17:16 [loggers.py:111] Engine 000: Avg prompt throughput: 1126.2 tokens/s,INFO 01-04 14:17:17 [loggers.py:111] Engine 000: Avg prompt throughput: 578.2 tokens/s, Avg generation throughput: 62.1 tokens/s, RuINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 151 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 152 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 153 |
+
INFO 01-04 14:17:26 INFO 01-04 14:17:27 [loggers.py:111] Engine 000: Avg prompt throughput: 596.8 tokens/s, Avg generation throughput: 58.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix INFO 01-04 14:17:36 [INFO 01-04 14:17:37 [loggers.py:111] Engine 000: Avg prompt throughput: 683.6 tokens/s, Avg generation throughput: 71.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.4%, Prefix cache hit rate: 9.8%
|
| 154 |
+
INFO: 10.43.30.4:0 - "POST /v1/completioINFO 01-04 14:17:46 [loggers.py:111] Engine 000: Avg prompt throughput: 171.9 tokensINFO 01-04 14:17:47 [loggers.py:111] Engine 000: Avg prompt throughput: 953.2 tokens/s, Avg generation throughput: 60.7 tokens/s, RunniINFO 01-04 14:17:56 [loggers.py:111] Engine 000: Avg prompt throughput: 665.4 tokens/s, Avg generation throughput: 59.1 tokens/s, Running: 2 reqs, WaiINFO 01-04 14:17:57 [loggers.py:111] Engine 000: Avg prompt throughpuINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 155 |
+
INFO 01-04 14:18:06 [loggers.py:111] Engine 000: Avg prompt throughput: 308.4 tokens/s, Avg generation throughput: 104.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 13.2%
|
| 156 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 157 |
+
INFO 01-04 14:18:16 [loggers.py:111] Engine 000: Avg prompt throughput: 329.6 tokens/s, Avg generation throughput: 77.5 tokens/s, Running: 2 reqs, WaiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 158 |
+
ININFO 01-04 14:18:26 [loggers.py:111] Engine 000: Avg prompt throughput: 245.1 tokens/s, Avg generation throughput: 128.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 12.9%
|
| 159 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 160 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 161 |
+
INFO 01-04 14:18:36 [loggers.py:111] Engine 000: Avg prompt throughput: 483.9 tokens/s, Avg generation throughput: 116.6 tokens/s, Running: 2 reqs, WaINFO 01-04 14:18:37 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:18:46 [loggers.py:111] Engine 000: Avg prompt throughput: 513.4 tokens/s, Avg generation throughput: 123.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.2%, Prefix cache hit rate: 13.3%
|
| 162 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 163 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 164 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.INFO 01-04INFO 01-04 14:18:56 [loggers.py:111] Engine 000: Avg prompt throughput: 393.9 tokens/s, Avg generation throughput: 31.2 tokens/s, Running: 1 reqs,INFO 01-04 14:18:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFO 01-04 14:19:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs,INFO 01-04 14:19:07 [loggers.py:111] Engine 000: Avg prompt throughput: 5INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 165 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 166 |
+
INFO 01-04 14:19:16 [loggers.py:111] Engine 000: Avg prompt throughput: 727.4 tINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 167 |
+
INFO 01-04 14:19:17 [loggers.py:111] Engine 000: Avg prompt throughput: 5INFO 01-04 14:19:26 [loggers.py:111] Engine 000: Avg prompt throughput: 322.2 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqsINFO 01-04 14:19:27 [loggers.py:111] Engine 000: Avg prompt throughput: 63INFO 01-04 14:19:36 [loggers.py:111] Engine 000: Avg prompt throughput: 589.9 tokens/s, Avg generation throughput: 67.6 tokens/s, Running: 2 reINFO 01-04 14:19:37 [loggers.py:111] Engine 000: Avg prompt throughput: 437.INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 168 |
+
INFO 01-04 14:19:46 [loggers.py:111] Engine 000: Avg prompt throughput: 287.7 tokens/s, Avg generation throughput: 108.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hitINFO 01-04 14INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 169 |
+
INFO 01-04 14:19:56 [loggers.py:111] Engine 000: Avg prompt throughput: 261.5 tokens/s, Avg generation throughput: 112.4 tokens/s, Running: 2INFO 01-04 14:19:57 [loggers.py:111] Engine 000: Avg prompt throughput: 600.4 tINFO 01-04 14:20:06 [loggers.py:111] Engine 000: Avg prompt throughput: 529.4 tokens/s, Avg generation throughput: 113.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hINFO 01-04 14:2INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 170 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 171 |
+
INFO 01-04 14:20:16 [loggers.py:111] Engine 000: Avg prompt throughput: 677.4 tokens/s, Avg generation throughput: 84.8 tokens/s, RunINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 172 |
+
INFO 01-04 14:20:17 INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 173 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 174 |
+
INFO 01-04 14:20:26 [loggers.py:111] Engine 000: Avg prompt throughINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 175 |
+
INFO 01-04 14:20:27 [loggers.py:111] Engine 000: Avg prompt throughput: 599.8 tokens/INFO 01-04 14:20:36 [loggers.py:111] Engine 000: Avg prompt throughput: 761.0 tokens/s, Avg generation throughput: 62.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.2%, Prefix cacINFO 01-04 14:20:37INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 176 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 177 |
+
INFO 01-04 14:20:46 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:20:47 [loggers.py:111] Engine 000: Avg prompt throughput: 634.7 tokens/s, Avg generation throughput: 54.4 tokens/s, Running: 2 reqs, WaiINFO 01-04 14:20:56 [loggers.py:111] Engine 000: Avg prompt throughput: 266.2 tokens/s, Avg generation throughput: 73.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 12.5%
|
| 178 |
+
INFO: 10.45.190.192:0 - "POST /v1/compleINFO 01-04 14:20:58 [loINFO 01-04 14:21:06 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:21:07 [loggers.py:111] Engine 000: Avg prompt throughput: 625.0 tokens/s, Avg generation throughput: 111.1 tokens/s, Running: 2 reqs, WaitiINFO 01-04 14:21:16 [loggers.py:111] Engine 000: Avg prompt throughput: 549.0 tokens/s, Avg generation throughput: 74.0 tokens/s, RINFO 01-04 14:21:17 [loggers.py:111] Engine 000: Avg prompt throughput: 870.4 tokens/s, INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 179 |
+
INFO 01-04 14:21:26 [loggers.py:111] Engine 000: Avg prompt throughput: 635.4 tokens/s, Avg generation throughput: 101.0 tokens/s, INFO 01-04 14:21:27 [loggers.py:111] Engine 000: Avg prompt throughput: 541.1 tokens/s, AINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 180 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 181 |
+
INFO 01-04 14:21:36 [loggers.py:111] Engine 000: Avg prompt throughput: 656.7 tokens/s, Avg generation throughput: 65.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 14.4%
|
| 182 |
+
INFO: 10.45.190.192:0 - "POST /v1/completINFO 01-04 14:21:3INFOINFO 01-04 14:21:46 [loggers.py:111] Engine 000: Avg prompt throughput: 637.5 tokens/s, Avg generation throughput: 30.7 tokensINFO 01-04 14:21:47 [loggers.py:111] Engine 000: Avg prompt throughput:INFO: 10.43.30.3:0INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OKIINFO 01-04 14:21:56 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:21:57 [loggers.py:111] Engine 000: Avg prompt throughput: 632.1 tokens/s, Avg generation throughput: 70.0 tokens/s, RunninINFO 01-04 14:21:58 INFINFO 01-04 14:22:06 [loggers.py:111] Engine 000: Avg prompt throughput: 665.3 tokens/s, Avg generation throughput: 50.8 tokeINFO 01-04 14:22:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generINFO 01-04 14:22:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.4%, INFO 01-04 14:22:17 [loggers.INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 183 |
+
INFO 01-04 14:22:26 [loggers.py:111] Engine 000: Avg prompt throughput: 589.8 tokens/s, Avg generation throughput: 111.5 tokenINFO 01-04 14:22:27 [loggers.py:111] Engine 000: Avg prompt throughput: 1509.7 tokens/s, Avg gINFO 01-04 14:22:36 [loggers.py:111] Engine 000: Avg prompt throughput: 672.7 tokens/s, Avg generation throughput: 96.9 tokensINFO 01-04 14:22:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg genINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 184 |
+
INFO 01-04 14:22:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 102.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.9%, PrINFO: 10.43.30.3:0 - "PINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 185 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 186 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 187 |
+
INFO 01-04 14:22:56 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:22:57 [loggers.py:111] Engine 000: Avg prompt throughput: 810.9 tokens/s, Avg generation throughput: 65.9 tokens/s, Running: 2 reqs, WaitingINFO 01-04 14:23:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RunnINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 188 |
+
INFO 01-04 14:23:07INFO 01-04 14:23:26 [loggers.py:111] Engine 000: Avg prompt throughput: 1094.5 tokens/s, Avg generation throughput: 44.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 13.9%
|
| 189 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 190 |
+
INFO 01-04 14:23:36 [loggers.py:111] Engine 000: Avg prompt throughput: 699.4 tokens/s, Avg generation throughput: 48.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.8%, Prefix cache hit rate: 13.7%
|
| 191 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 192 |
+
INFO 01-04 14:23:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 57.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 13.7%
|
| 193 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 194 |
+
INFO 01-04 14:23:56 [loggers.py:111] Engine 000: Avg prompt throughput: 715.5 tokens/s, Avg generation throughput: 41.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 13.4%
|
| 195 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 196 |
+
INFO 01-04 14:24:06 [loggers.py:111] Engine 000: Avg prompt throughput: 726.9 tokens/s, Avg generation throughput: 72.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 13.2%
|
| 197 |
+
INFO 01-04 14:24:16 [loggers.py:111] Engine 000: Avg prompt throughput: 883.9 tokens/s, Avg generation throughput: 42.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.7%, Prefix cache hit rate: 12.9%
|
| 198 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 199 |
+
INFO 01-04 14:24:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 55.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 12.9%
|
| 200 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 201 |
+
INFO 01-04 14:24:36 [loggers.py:111] Engine 000: Avg prompt throughput: 695.2 tokens/s, Avg generation throughput: 23.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 12.7%
|
| 202 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 203 |
+
INFO 01-04 14:24:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 26.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.7%
|
| 204 |
+
INFO 01-04 14:24:56 [loggers.py:111] Engine 000: Avg prompt throughput: 445.6 tokens/s, Avg generation throughput: 25.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 12.6%
|
| 205 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 206 |
+
INFO 01-04 14:25:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 10.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.6%
|
| 207 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 208 |
+
INFO 01-04 14:25:16 [loggers.py:111] Engine 000: Avg prompt throughput: 728.3 tokens/s, Avg generation throughput: 18.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.4%
|
| 209 |
+
INFO 01-04 14:25:26 [loggers.py:111] Engine 000: Avg prompt throughput: 517.9 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 12.2%
|
| 210 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 211 |
+
INFO 01-04 14:25:36 [loggers.py:111] Engine 000: Avg prompt throughput: 602.2 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 12.0%
|
| 212 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 213 |
+
INFO 01-04 14:25:46 [loggers.py:111] Engine 000: Avg prompt throughput: 627.1 tokens/s, Avg generation throughput: 31.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 11.9%
|
| 214 |
+
INFO 01-04 14:25:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 11.9%
|
| 215 |
+
INFO 01-04 14:26:06 [loggers.py:111] Engine 000: Avg prompt throughput: 705.4 tokens/s, Avg generation throughput: 48.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.6%, Prefix cache hit rate: 11.7%
|
| 216 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 217 |
+
INFO 01-04 14:26:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 71.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 11.7%
|
| 218 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 219 |
+
INFO 01-04 14:26:26 [loggers.py:111] Engine 000: Avg prompt throughput: 538.3 tokens/s, Avg generation throughput: 54.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 11.6%
|
| 220 |
+
INFO 01-04 14:26:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 11.6%
|
| 221 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 222 |
+
INFO 01-04 14:26:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 39.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.6%
|
| 223 |
+
INFO 01-04 14:26:56 [loggers.py:111] Engine 000: Avg prompt throughput: 574.7 tokens/s, Avg generation throughput: 38.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 11.4%
|
| 224 |
+
INFO 01-04 14:27:06 [loggers.py:111] Engine 000: Avg prompt throughput: 724.1 tokens/s, Avg generation throughput: 68.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache hit rate: 11.2%
|
| 225 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 226 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 227 |
+
INFO 01-04 14:27:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.2%
|
| 228 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 229 |
+
INFO 01-04 14:27:26 [loggers.py:111] Engine 000: Avg prompt throughput: 800.4 tokens/s, Avg generation throughput: 26.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.0%
|
| 230 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 231 |
+
INFO 01-04 14:27:36 [loggers.py:111] Engine 000: Avg prompt throughput: 598.9 tokens/s, Avg generation throughput: 15.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.9%
|
| 232 |
+
INFO 01-04 14:27:46 [loggers.py:111] Engine 000: Avg prompt throughput: 769.5 tokens/s, Avg generation throughput: 10.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 11.9%
|
| 233 |
+
INFO 01-04 14:27:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 11.9%
|
| 234 |
+
INFO 01-04 14:28:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 11.9%
|
| 235 |
+
INFO 01-04 14:28:16 [loggers.py:111] Engine 000: Avg prompt throughput: 219.4 tokens/s, Avg generation throughput: 79.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cache hit rate: 11.9%
|
| 236 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 237 |
+
INFO 01-04 14:28:26 [loggers.py:111] Engine 000: Avg prompt throughput: 462.0 tokens/s, Avg generation throughput: 54.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 11.8%
|
| 238 |
+
INFO 01-04 14:28:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 11.8%
|
| 239 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 240 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 241 |
+
INFO 01-04 14:28:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 43.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.8%
|
| 242 |
+
INFO 01-04 14:28:56 [loggers.py:111] Engine 000: Avg prompt throughput: 875.8 tokens/s, Avg generation throughput: 10.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 11.6%
|
| 243 |
+
INFO 01-04 14:29:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 11.6%
|
| 244 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 245 |
+
INFO 01-04 14:29:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 31.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.6%
|
| 246 |
+
INFO 01-04 14:29:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.6%
|
| 247 |
+
INFO 01-04 14:29:46 [loggers.py:111] Engine 000: Avg prompt throughput: 890.9 tokens/s, Avg generation throughput: 12.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 11.4%
|
| 248 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 249 |
+
INFO 01-04 14:29:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 34.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.4%
|
| 250 |
+
INFO 01-04 14:30:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.4%
|
| 251 |
+
INFO 01-04 14:30:26 [loggers.py:111] Engine 000: Avg prompt throughput: 974.1 tokens/s, Avg generation throughput: 37.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache hit rate: 11.2%
|
| 252 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 253 |
+
INFO 01-04 14:30:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 15.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.2%
|
| 254 |
+
INFO 01-04 14:30:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.2%
|
| 255 |
+
INFO 01-04 14:30:56 [loggers.py:111] Engine 000: Avg prompt throughput: 640.3 tokens/s, Avg generation throughput: 26.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 11.0%
|
| 256 |
+
INFO 01-04 14:31:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 11.0%
|
| 257 |
+
INFO 01-04 14:31:16 [loggers.py:111] Engine 000: Avg prompt throughput: 422.4 tokens/s, Avg generation throughput: 46.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.7%, Prefix cache hit rate: 10.9%
|
| 258 |
+
INFO 01-04 14:31:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 91.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.0%, Prefix cache hit rate: 10.9%
|
| 259 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 260 |
+
INFO 01-04 14:31:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 84.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 10.9%
|
| 261 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 262 |
+
INFO 01-04 14:31:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 11.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.9%
|
| 263 |
+
INFO 01-04 14:31:56 [loggers.py:111] Engine 000: Avg prompt throughput: 807.4 tokens/s, Avg generation throughput: 13.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 11.8%
|
| 264 |
+
INFO 01-04 14:32:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 11.8%
|
| 265 |
+
INFO 01-04 14:32:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 11.8%
|
| 266 |
+
INFO 01-04 14:32:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 11.8%
|
| 267 |
+
INFO 01-04 14:32:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache hit rate: 11.8%
|
| 268 |
+
INFO 01-04 14:32:46 [loggers.py:111] Engine 000: Avg prompt throughput: 540.7 tokens/s, Avg generation throughput: 61.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.3%, Prefix cache hit rate: 11.7%
|
| 269 |
+
INFO 01-04 14:32:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.7%, Prefix cache hit rate: 11.7%
|
| 270 |
+
INFO 01-04 14:33:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.0%, Prefix cache hit rate: 11.7%
|
| 271 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 272 |
+
INFO 01-04 14:33:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.7%, Prefix cache hit rate: 11.7%
|
| 273 |
+
INFO 01-04 14:33:26 [loggers.py:111] Engine 000: Avg prompt throughput: 617.3 tokens/s, Avg generation throughput: 88.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.4%, Prefix cache hit rate: 11.5%
|
| 274 |
+
INFO 01-04 14:33:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 89.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.8%, Prefix cache hit rate: 11.5%
|
| 275 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 276 |
+
INFO 01-04 14:33:46 [loggers.py:1INFO 01-04 14:33:48 [loggers.py:111] Engine 000: Avg prompt throughput: 820.8 tokens/s, Avg generation throughput: 20.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-04 14:33:56 [loggers.py:111INFO 01-04 14:33:58 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-04 14:34:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation INFO 01-04 14:34:08 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 25.INFO 01-04 14:34:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation INFO 01-04 14:34:18 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0INFO 01-04 14:34:26 [loggers.py:111] Engine 000: Avg prompt throughput: 873.3 tokens/s, Avg generatiINFO 01-04 14:34:48 [loggers.py:111] Engine 000: Avg prompt throughput: 759.6 tokens/s, Avg generation throughput: 45.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 21.7%
|
| 277 |
+
INFO 01-04 14:34:58 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:35:03 [loggers.py:111] Engine 000: Avg prompt throughput: 925.1 tokens/s, Avg generation throughput: 36.8 tokens/s, Running: 1 reqs, Waiting: 0 rINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" INFO 01-04 14:35:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache hit rate: 19.3%
|
| 278 |
+
INFO 01-04 14:35:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: 19.3%
|
| 279 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 280 |
+
INFO 01-04 14:35:33INFO 01-04 14:35:46 [loggers.py:111] Engine 000: Avg prompt throughput: 898.0 tokens/s, Avg generation throughput: 30.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 16.4%
|
| 281 |
+
INFO 01-04 14:35:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, RunINFO 01-04 14:36:46 [loggers.py:111] Engine 000: Avg prompt throughput: 1207.6 tokens/s, Avg generation throughput: 26.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.8%, Prefix cache hit rate: 16.0%
|
| 282 |
+
INFO 01-04 14:36:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.0%, Prefix cache hit rate: 16.0%
|
| 283 |
+
INFO: 10.46.50.192:0 - "POSTINFO 01-04 14:36:56 [loggers.py:11INFO 01-04 14:37:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokensINFO 01-04 14:37:26 [loggers.py:111] Engine 000: Avg prompt throughput: 615.0 tokens/s, Avg generation throughput: 40.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 17.1%
|
| 284 |
+
INFO 01-04 14:37:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, RunINFO 01-04 14:38:16 [loggers.py:111] Engine 000: Avg prompt throughput: 521.8 tokens/s, Avg generation throughput: 38.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 15.9%
|
| 285 |
+
INFO 01-04 14:38:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 15.9%
|
| 286 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 287 |
+
INFO 01-04 14:38:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/INFO 01-04 14:38:46 [loggers.py:111] Engine 000: Avg prompt throughput: 471.6 tokens/s, Avg generation throughput: 8.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 16.9%
|
| 288 |
+
INFO 01-04 14:38:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, RunnINFO 01-04 14:40:36 [loggers.py:111] Engine 000: Avg prompt throughput: 1101.8 tokens/s, Avg generation throughput: 34.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 17.0%
|
| 289 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 290 |
+
INFO 01-04 14:40:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 31.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.0%
|
| 291 |
+
INFO 01-04 14:40:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.0%
|
| 292 |
+
INFO 01-04 14:42:16 [loggers.py:111] Engine 000: Avg prompt throughput: 698.6 tokens/s, Avg generation throughput: 32.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 16.8%
|
| 293 |
+
INFO 01-04 14:42:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 16.8%
|
| 294 |
+
INFO 01-04 14:42:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 16.8%
|
| 295 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 296 |
+
INFO 01-04 14:42:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 19.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.8%
|
| 297 |
+
INFO 01-04 14:42:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.8%
|
| 298 |
+
INFO 01-04 14:44:26 [loggers.py:111] Engine 000: Avg prompt throughput: 443.6 tokens/s, Avg generation throughput: 23.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 16.7%
|
| 299 |
+
INFO 01-04 14:44:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 16.7%
|
| 300 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 301 |
+
INFO 01-04 14:44:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 23.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0INFO 01-04 14:45:26 [loggers.py:111] Engine 000: Avg prompt throughput: 728.0 tokens/s, Avg generation throughput: 22.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 13.3%
|
| 302 |
+
INFO: 10.46.50.192:0 - "PINFO 01-04 14:46:26 [loggers.py:111] Engine 000: Avg prompt throughput: 355.5 tokens/s, Avg generation throughput: 39.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 16.6%
|
| 303 |
+
INFO 01-04 14:46:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix cache hit rate: 16.6%
|
| 304 |
+
INFO 01-04 14:46:46 [loggers.py:11INFO 01-04 14:50:26 [loggers.py:111] Engine 000: Avg prompt throughput: 645.3 tokens/s, Avg generation throughput: 45.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 13.2%
|
| 305 |
+
INFO 01-04 14:50:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 13.2%
|
| 306 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 307 |
+
INFO 01-04 14:50:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 10.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.2%
|
| 308 |
+
INFO 01-04 14:50:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.2%
|
| 309 |
+
INFO 01-04 15:05:16 [loggers.py:111] Engine 000: Avg prompt throughput: 507.4 tokens/s, Avg generation throughput: 36.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 13.0%
|
| 310 |
+
INFO 01-04 15:05:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 13.0%
|
| 311 |
+
INFO 01-04 15:05:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 13.0%
|
| 312 |
+
INFO 01-04 15:05:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 13.0%
|
| 313 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 314 |
+
INFO 01-04 15:05:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 19.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.0%
|
| 315 |
+
INFO 01-04 15:06:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.0%
|
| 316 |
+
n throughput: 47.5 tokensINFO 01-04 15:04:43 [loggers.py:111] Engine 000: Avg prompt throughput: 503.2 tokens/s, Avg generation throughput: 29.2 tokens/s, Running: 1 reqs, Waiting: 0INFO 01-04 15:08:27 [loggers.py:111] Engine 000: Avg prompt throughput: 530.6 tokens/s, Avg generation throughput: 15.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 12.8%
|
| 317 |
+
INFO 01-04 15:08:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 12.8%
|
| 318 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 319 |
+
INFO 01-04 15:08:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 14.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.8%
|
| 320 |
+
INFO 01-04 15:08:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.8%
|
| 321 |
+
0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.0%
|
| 322 |
+
pt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 19.3%
|
| 323 |
+
INFO 01-04 15:02:28 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 19.3%
|
| 324 |
+
INFO 01-04 15:02:38 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 19.3%
|
| 325 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 326 |
+
INFO 01-04 15:02:48 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 28.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.3%
|
| 327 |
+
INFO 01-04 15:02:58 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.3%
|
hf_ip/vllm_gpu3.log
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 2 |
+
warnings.warn(
|
| 3 |
+
INFO 01-04 13:13:33 [__init__.py:239] Automatically detected platform cuda.
|
| 4 |
+
INFO 01-04 13:13:36 [api_server.py:1043] vLLM API server version 0.8.5
|
| 5 |
+
INFO 01-04 13:13:36 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8004, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
|
| 6 |
+
INFO 01-04 13:13:43 [config.py:717] This model supports multiple tasks: {'generate', 'score', 'classify', 'embed', 'reward'}. Defaulting to 'generate'.
|
| 7 |
+
INFO 01-04 13:13:44 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
|
| 8 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 9 |
+
warnings.warn(
|
| 10 |
+
INFO 01-04 13:13:48 [__init__.py:239] Automatically detected platform cuda.
|
| 11 |
+
INFO 01-04 13:13:52 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
|
| 12 |
+
WARNING 01-04 13:13:52 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7f3dc054a8c0>
|
| 13 |
+
INFO 01-04 13:13:53 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
|
| 14 |
+
INFO 01-04 13:13:53 [cuda.py:221] Using Flash Attention backend on V1 engine.
|
| 15 |
+
WARNING 01-04 13:13:53 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
|
| 16 |
+
INFO 01-04 13:13:53 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
INFO 01-04 13:15:40 [loader.py:458] Loading weights took 106.29 seconds
|
| 35 |
+
INFO 01-04 13:15:40 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 106.583423 seconds
|
| 36 |
+
INFO 01-04 13:15:54 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
|
| 37 |
+
INFO 01-04 13:15:54 [backends.py:430] Dynamo bytecode transform time: 14.34 s
|
| 38 |
+
INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later use
|
| 39 |
+
INFO 01-04 13:16:50 [backends.py:148] Compiling a graph for general shape takes 54.37 s
|
| 40 |
+
INFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 68.72 s in total
|
| 41 |
+
INFO 01-04 13:17:53 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
|
| 42 |
+
INFO 01-04 13:17:53 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94x
|
| 43 |
+
INFO 01-04 13:18:32 [gpu_model_runner.py:1686] Graph capturing finished in 39 secs, took 1.21 GiB
|
| 44 |
+
INFO 01-04 13:18:32 [core.py:159] init engine (profile, create kv cache, warmup model) took 172.43 seconds
|
| 45 |
+
INFO 01-04 13:18:32 [core_client.py:439] Core engine process 0 ready.
|
| 46 |
+
WARNING 01-04 13:18:32 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
|
| 47 |
+
INFO 01-04 13:18:32 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 48 |
+
INFO 01-04 13:18:32 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 49 |
+
INFO 01-04 13:18:32 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8004
|
| 50 |
+
INFO 01-04 13:18:32 [launcher.py:28] Available routes are:
|
| 51 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /openapi.json, Methods: HEAD, GET
|
| 52 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /docs, Methods: HEAD, GET
|
| 53 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: HEAD, GET
|
| 54 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /redoc, Methods: HEAD, GET
|
| 55 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /health, Methods: GET
|
| 56 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /load, Methods: GET
|
| 57 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /ping, Methods: GET, POST
|
| 58 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /tokenize, Methods: POST
|
| 59 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /detokenize, Methods: POST
|
| 60 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/models, Methods: GET
|
| 61 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /version, Methods: GET
|
| 62 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
|
| 63 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/completions, Methods: POST
|
| 64 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/embeddings, Methods: POST
|
| 65 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /pooling, Methods: POST
|
| 66 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /score, Methods: POST
|
| 67 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/score, Methods: POST
|
| 68 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
|
| 69 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /rerank, Methods: POST
|
| 70 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/rerank, Methods: POST
|
| 71 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v2/rerank, Methods: POST
|
| 72 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /invocations, Methods: POST
|
| 73 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /metrics, Methods: GET
|
| 74 |
+
INFO: Started server process [1316452]
|
| 75 |
+
INFO: Waiting for application startup.
|
| 76 |
+
INFO: Application startup compININFOINFO 01-04 14:00:03 [loggers.py:111] Engine 000: Avg prompt throughput: 120.9 tokens/s, Avg generation throughput: 93.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cache hit ratINININFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 77 |
+
INFO INFO 01-04 14:00:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 80.8 tokens/s, Running: 1 reqs, WaiIINFO 01-04 14:00:19 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:00:20 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:00:23 [loggers.py:111] Engine 000: Avg prompt throughput: 63.2 tokensINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 2INININFO 01-04 14:00:30 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:00:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqsININFO 01-04 14:00:40 [loggers.py:111] Engine 000: Avg prompt throughput: 61.5 tokens/s, Avg generation throughput: 107.7 tokens/s, RuINFO 01-04 14:00:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, WaitIINFO 01-04 14:00:50 [loggers.py:111] Engine 000: Avg prompt throughput: 54.3 tokens/s, Avg generation throughput: 118.6 tokens/s, RuINFO 01-04 14:00:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/sININFO:INFO 01-04 14:00:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.6 tokeININFO 01-04 14:01:03 [loggers.py:111] Engine 000: Avg prompt throughput: 81.3 tokens/s, Avg generation throughput: 28.1 tokens/s, Running: 1 reqs, WaitiINFO INFO 01-04 14:01:07 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:01:13 [loggers.py:111] Engine 000: Avg prompt throughput: 100.0 tokens/s, Avg generation throughput: 64.3 tokens/s, Running: 2 reqs, Waiting: 0 INFO 01-04 14:01:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 141.3 tokenINFO 01-04 14:01:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 53.7 tokens/s, Running: 1 reqs, Waiting:INFO 01-04 14:01:26 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:01:33 [loggers.py:111] Engine 000: Avg prompt throughput: 89.9 tokens/s, Avg generation throughput: 91.5 tokens/s, Running: 2 reqs, Waiting: INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 78 |
+
INFO 01-04 14:01:43 [loggers.py:111] Engine 000: Avg prompt throughput: 81.1 tokens/s, Avg generation throughput: 131.3 tokens/s, Running: 2 reqs, Waiting: 0INFINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1"INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 79 |
+
INFO 01-04 14:01:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 88.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, INFO: 10.45.190.192:0 - "POST /v1/completions HTTPINFO 01-04 14:02:03 [loggers.py:111] Engine 000: Avg prompt throughput: 163.9 tokens/s, Avg generation throughput: 93.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 0.7%
|
| 80 |
+
INFO: 1INFO 01-04 14:02:07 [loggers.py:111] Engine 000: Avg INFO 01-04 14:02:13 [loggers.py:111] Engine 000: Avg prompt throughput: 102.2 tokens/s, Avg generation throughput: 138.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, INFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompt throughput: 103.5 tokens/s, Avg generation throughput: 126INFO 01-04 14:02:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 95.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: INFO INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 81 |
+
INFO 01-04 14:02:33 [loggers.py:111] Engine 000: Avg prompt throughput: 97.1 tokens/s, Avg generation throughput: 95.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, INFO 01-04 14:02:36 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:02:43 [loggers.py:111] Engine 000: Avg prompt throughput: 96.5 tokens/s, Avg generation throughput: 107.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 0.5%
|
| 82 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 83 |
+
INFO 01-04 14:02:53 [loggers.py:111] Engine 000: Avg prompt throughput: 86.3 tokens/s, Avg generation throughput: 128.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 0.6%
|
| 84 |
+
INFO: 10.45.INFO 01-04 14:02:57 [loggers.py:111] Engine 000: AvINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 85 |
+
INFO 01-04 14:03:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 81.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPUINFO 01-04 14:03:07 [loggers.py:111] Engine 000: AvINFO 01-04 14:03:13 [loggers.py:111] Engine 000: Avg prompt throughput: 133.9 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPUINFO: 10.46.17.192:0 - "POST /v1/completions HTINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 86 |
+
INFO: INFO 01-04 14:03:20 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:03:23 [loggers.py:111] Engine 000: Avg prompt throughput: 134.6 tokens/s, Avg generation throughput: 80.3 tokens/s, Running: 1 reINFO 01-04 14:03:25 [loINFO 01-04 14:03:27 [loggers.py:111] Engine 000: AvgINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 87 |
+
INFO 01-04 14:03:33 [loggers.py:111] Engine 000: Avg prompt throughput: 129.5 tokens/s, Avg generatiINFO 01-04 14:03:37 [loggers.py:111] Engine 000: Avg prompt throughput: 88.4 tokens/s, Avg generation throughput: 87.1 tokens/sINFO 01-04 14:03:40 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:03:43 [loggers.py:111INFO 01-04 14:03:47 [loggers.py:111] Engine 000: Avg prompt throughput: 120.8 tokens/s, Avg generation throughput: 116.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usageINFO: 10INFO: 10.43.30.3:0 -INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 88 |
+
INFO 01-04 14:03:57 [loggers.py:111] Engine 000: Avg prompt throughput: 159.7 tokens/s, Avg generation throughput: 164.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usaINFO 01-04 14:04:03 [loggers.py:111] Engine 000: Avg prompt throughput: 161.7 tokens/s, Avg generationINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 89 |
+
INFO 01-04 14:04:07 [loggers.py:111] Engine 000: AINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 90 |
+
INFO: 1INFO 01-04 14:04:10 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:04:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thINFO 01-04 14:04:17 [loggers.py:111] Engine 000: Avg prompt INFO: 10.46.17.192:0 - "POST /v1/completions HTTPINFO 01-04 14:04:23 [loggers.py:111] Engine 000: Avg prompt throughput: 167.1 tokens/s, Avg generation thINFO 01-04 14:04:27 [loggers.py:111] Engine 000: Avg prompt throughput: 174.5 tokens/s, Avg generation throughputINFO 01-04 14:04:33 [loggers.py:111] Engine 000: Avg prompt throughput: 144.0 tokens/s, Avg generation throughput: 68.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 0.6%
|
| 91 |
+
INFO: 10.46.17.19INFO 01-04 14:04:37 [loggers.py:111] Engine 0INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 92 |
+
INFO 01-04 14:04:43 [loggers.py:111] Engine 000: Avg prompt throughput: 195.8 tokens/s, Avg generation throughput: 52.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cachINFO: 10.46.17.192:0 - "POST /v1/compleINFO: 10.45.190.192INFO 01-04 14:04:47 [loggers.py:111] Engine INFO 01-04 14:04:53 [loggers.py:111] Engine 000: Avg prompt throughput: 171.5 tokens/s, Avg generation throughput: 34.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:04:57 [loggers.py:111] Engine 00INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 93 |
+
INFO 01-04 14:05:03 [loggers.py:111] Engine 000: Avg prompt throughput: 183.8 tokens/s, Avg generation throughput: 32.2 tokens/s, Running: 1 reqs, WaitiINFO: 10.46.17.1INFO 01-04 14:05:07 [loggers.py:111] Engine 00INFO 01-04 14:05:13 [loggers.py:111] Engine 000: Avg prompt throughput: 131.3 tokens/s, Avg generation throughput: 61.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cINFO: 10.45.190.192:0 - "POST /v1/completIINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 94 |
+
INFO 01-04 14:05:23 [loggers.py:111] Engine 000: Avg prompt throughput: 204.0 tokens/s, Avg generation throughput: 70.4 tokens/s, Running: 2 reqs, WaitinINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OKINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 95 |
+
INFO 01-04 14:05:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 68.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.0%, Prefix cache hit rate: 2.2%
|
| 96 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 97 |
+
INFO 01-04 14:05:43 [loggers.py:111] Engine 000: Avg prompt throughput: 163.2 tokens/s, Avg generation throughput: 68.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cacheINFO 01-04 14:05:47 [loggers.py:111] EnginINFO 01-04 14:05:53 [loggers.py:111] Engine 000: Avg prompt throughput: 166.6 tokens/s, Avg generation throughput: 77.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 2.1%
|
| 98 |
+
INFO: 10.46.50.192:0 INFO 01-04 14:05:57 [loggers.py:111] EngiINFO 01-04 14:06:03 [loggers.py:111] Engine 000: Avg prompt throughput: 340.7 tokens/s, Avg generation throughput: 107.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 6.4%
|
| 99 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 100 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 101 |
+
INFO 01-04 14:06:13 [loggers.py:111] Engine 000: Avg prompt throughput: 152.7 tokens/s, Avg generation throughput: 92.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV INFO: 10.46.17.192:0 - "POST /v1/completionINFO: 10.45.190INFO 01-04 14:06:17 [loggers.py:111] Engine 000:INFO 01-04 14:06:23 [loggers.py:111] Engine 000: Avg prompt throughput: 168.2 tokens/s, Avg generation throughput: 82.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 9.1%
|
| 102 |
+
INFO 01-04 14:06:33 [loggers.py:111] Engine 000: Avg prompt throughput: 186.5 tokens/s, Avg generation throughput: 100.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 8.7%
|
| 103 |
+
INFO: 10.43INFO: 10.46.17.192:0 - "POST /v1/completions INFO 01-04 14:06:43 [loggers.py:111] Engine 000: Avg prompt throughput: 225.2 tokens/s, Avg generation throughput: 96.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 8.3%
|
| 104 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 105 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 106 |
+
INFO 01-04 14:06:53 [loggers.py:111] Engine 000: Avg prompt throughput: 212.9 tokens/s, Avg generation throughput: 138.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU INFO: 10.43.30.5:0 - "POST /v1/completions HTTINFO 01-04 14:07:03 [loggers.py:111] Engine 000: Avg prompt throughput: 225.8 tokens/s, Avg generation throughput: 101.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 7.6%
|
| 107 |
+
INFO: 10.INFO 01-04 14:07:07 [loggers.py:111] Engine 000: AvINFO 01-04 14:07:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 113.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU INFO: 10.46.17.192:0 - "POST /v1/completions HINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 108 |
+
INFO 01-04 14:07:23 [loggers.py:111] Engine 000: Avg prompt throughput: 239.1 tokens/s, Avg generation throughput: 114.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 7.3%
|
| 109 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 110 |
+
INFO 01-04 14:07:33 [loggers.py:111] Engine 000: Avg prompt throughput: 454.7 tokens/s, Avg generation throughput: 121.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 6.7%
|
| 111 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 112 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 113 |
+
INFO 01-04 14:07:43 [loggers.py:111] Engine 000: Avg prompt throughput: 242.1 tokens/s, Avg generation throughput: 95.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs,INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/INFO 01-04 14:07:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 6.5%
|
| 114 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 115 |
+
INFO 01-04 14:08:03 [loggers.py:111] Engine 000: Avg prompt throughput: 322.9 tokens/s, Avg generation throughput: 92.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 6.1%
|
| 116 |
+
INFO 01-04 14:08:13 [loggers.py:111] Engine 000: Avg prompt throughput: 234.5 tokens/s, Avg generation throughput: 109.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hit rate: 5.9%
|
| 117 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 118 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 119 |
+
INFO 01-04 14:08:23 [loggers.py:111] Engine 000: Avg prompt throughput: 222.1 tokens/s, Avg generation throughput: 95.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 5.7%
|
| 120 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 121 |
+
INFO 01-04 14:08:33 [loggers.py:111] Engine 000: Avg prompt throughput: 371.0 tokens/s, Avg generation throughput: 74.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 6.6%
|
| 122 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HINFO 01-04 14:08INFO 01-04 14:08:43 [loggers.py:111] Engine 000: Avg prompt throughput: 217.7 tokens/s, Avg generation throughput: 81.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache INFO: 10.43INFO 01-04 14:08:53 [loggers.py:111] Engine 000: Avg prompt throughput: 333.3 tokens/s, Avg generation throughput: 115.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.7%, Prefix cache hit rate: 6.1%
|
| 123 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 124 |
+
INFO 01-04 14:09:03 [loggers.py:111] Engine 000: Avg prompt throughput: 207.6 tokens/s, Avg generation throughput: 123.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.8%, Prefix cache hit rate: 6.0%
|
| 125 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 126 |
+
INFO 01-04 14:09:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cache hit rate: 6.0%
|
| 127 |
+
INFO 01-04 14:09:23 [loggers.py:111] Engine 000: Avg prompt throughput: 251.7 tokens/s, Avg generation throughput: 124.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.1%, Prefix cache hit rate: 5.8%
|
| 128 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 129 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 130 |
+
INFO 01-04 14:09:33 [loggers.py:111] Engine 000: Avg prompt throughput: 306.6 tokens/s, Avg generation throughput: 98.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit INFO: 1INFO 01-04 14:09:43 [loggers.py:111] Engine 000: Avg prompt throughput: 357.1 tokens/s, Avg generation throughput: 110.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 7.5%
|
| 131 |
+
INFO:INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 132 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 133 |
+
INFO 01-04 14:09:53 [loggers.py:111] Engine 000: Avg prompt throughput: 309.2 tokens/s, Avg generation throughput: 116.7 tokens/s, Running: 1 reqs, Waiting: INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200INFO 01-04 14:10:03 [loggers.py:111] Engine 000: Avg prompt throughput: 379.1 tokens/s, Avg generation throughput: 72.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 9.5%
|
| 134 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 135 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 136 |
+
INFO 01-04 14:10:13 [loggers.py:111] Engine 000: Avg prompt throughput: 379.0 tokens/s, Avg geINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 137 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 20INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 138 |
+
INFO 01-04 14:10:23 [loggers.py:111] Engine 000: Avg prompt throughput: 321.2 tokens/s, Avg generation throughput: 64.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 13.5%
|
| 139 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 140 |
+
INFO 01-04 14:10:33 [loggers.py:111] Engine 000: Avg prompt throughput: 475.5 tokens/s, Avg generation throughput: 57.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 12.9%
|
| 141 |
+
INFINFO 01-04 14:10:37 [loggers.py:111] Engine 000: AvINFO: 10INFO 01-04 14:10:43 [loggers.py:111] Engine 000: Avg prompt throughput: 292.6 tokens/s, Avg generation throughput: 44.1 tokens/s, Running: 1 reqs, Waiting:INFO 01-04 14:10:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 INFO: 10INFO 01-04 14:10:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 142 |
+
INFO 01-04 14:10:57 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:11:03 [loggers.py:111] Engine 000: Avg prompt throughput: 350.3 tokens/s, Avg generation throughput: 3.3 tokens/s, Running: 1 reqs, WaitiINFO 01-04 14:11:07 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:11:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, WaitINFO 01-04 14:11:17 [loggers.py:111] Engine 000: Avg prompt throughpINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 143 |
+
INFO 01-04 14:11:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 32.7 tokens/s, Running: 0 reqs, WaitiINFO 01-04 14:11:27 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:11:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, WaitiINFO 01-04 14:11:37 [loggers.py:111] INFO 01-04 14:11:39 [loggers.pINFO 01-04 14:11:53 [loggers.py:111] Engine 000: Avg prompt throughput: 257.2 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 12.7%
|
| 144 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 145 |
+
INFO 01-04 14:12:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 8.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.7%
|
| 146 |
+
INFO 01-04 14:12:13 [loggers.py:111] Engine 000: Avg prompt throughput: 209.4 tokens/s, Avg generation throughput: 1.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, Prefix cache hit rate: 12.5%
|
| 147 |
+
INFO 01-04 14:12:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.0%, Prefix cache hit rate: 12.5%
|
| 148 |
+
INFO 01-04 14:12:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 12.5%
|
| 149 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 150 |
+
INFO 01-04 14:12:43 [loggers.py:111] Engine 000: Avg prompt throughput: 725.6 tokens/s, Avg generation throughput: 86.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 12.3%
|
| 151 |
+
INFO 01-04 14:12:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 12.3%
|
| 152 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 153 |
+
INFO 01-04 14:13:03 [loggers.py:111] Engine 000: Avg prompt throughput: 232.7 tokens/s, Avg generation throughput: 85.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 12.1%
|
| 154 |
+
INFO 01-04 14:13:13 [loggers.py:111] Engine 000: Avg prompt throughput: 172.1 tokens/s, Avg generation throughput: 120.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 11.9%
|
| 155 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 156 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 157 |
+
INFO 01-04 14:13:23 [loggers.py:111] Engine 000: Avg prompt throughput: 288.1 tokens/s, Avg generation throughput: 124.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 11.7%
|
| 158 |
+
INFO: 10.46.17.192:0 - "POST /v1/completionINFO 01-04 14:13:25INFO 01-04 14:13:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 54.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cacheINFO: 10.46.1INFO 01-04 14:13:43 [loggers.py:111] Engine 000: Avg prompt throughput: 393.7 tokens/s, Avg generation throughput: 73.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 11.3%
|
| 159 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 160 |
+
INFO 01-04 14:13:53 [loggers.py:111] Engine 000: Avg prompt throughput: 376.6 tokens/s, Avg generation throughput: 114.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 12.2%
|
| 161 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 162 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 163 |
+
INFO 01-04 14:14:03 [loggers.py:111] Engine 000: Avg prompt throughput: 306.6 tokens/s, Avg generation throughput: 65.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3INFO: 10.43.30.3:0 - "POST /INFO 01-04 14:14:13 [loggers.py:111] Engine 000: Avg prompt throughput: 914.3 tokens/s, Avg generation throughput: 81.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cache hit rate: 11.2%
|
| 164 |
+
INFO: 10.46.17.192:0 - "POST /v1/compINFO 01-04 14:14:15 [loggINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 165 |
+
INFO 01-04 14:14:23 [loggers.py:111] Engine 000: Avg prompt throughput: 387.3 tokens/s, Avg generation throughput: 110.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usagINFO 01-04 14:14:26 [loggers.py:111] Engine 000: Avg prompt throughput: 712.5 tokens/s, Avg generation tINFO 01-04 14:14:33 [loggers.py:111] Engine 000: Avg prompt throughput: 323.5 tokens/s, Avg generation throughput: 58.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache uINFO 01-04 14:14:36 [loggers.py:111] EngiINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 166 |
+
INFO 01-04 14:14:43 [loggers.py:111] Engine 000: Avg prompt throughput: 299.0 tokens/s, Avg generation throughput: 92.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cacheINFO 01-04 14:14:46 [loggers.py:111] Engine 000: Avg prompt throughput: 402.2 tokens/s, Avg generation throINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 167 |
+
INFO 01-04 14:14:53 [loggers.py:111] Engine 000: Avg prompt throughput: 226.2 tokens/s, Avg generation througINFO: 10.45.1INFO 01-04 14:14:55 [loggers.py:111] Engine 000: AINFO 01-04 14:14:56 [loggers.py:111] EngineINFO 01-04 14:15:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/sINFO 01-04 14:15:05 [loggers.py:111] Engine 000: INFO 01-04 14:15:06 [loggers.py:111] EnginINFO 01-04 14:15:13 [loggers.py:111] Engine 000: Avg prompt throughput: 752.4 tokens/s, Avg generation throughput: 75.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 12.6%
|
| 168 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 169 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 170 |
+
INFO 01-04 14:15:23 [loggers.py:111] Engine 000: Avg prompt throughput: 477.7 tokens/s, Avg generation throughput: 89.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache INFO: 10.43.30.4:0INFO: 10.43.30.5INFO: 10.43.30.5:0INFO 01-04 14:15:26 [loggers.py:111] EnginINFO 01-04 14:15:33 [loggers.py:111] Engine 000: Avg prompt throughput: 428.7 tokens/s, Avg generation throughput: 65.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix caINFO 01-04 14:15:35 INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 171 |
+
INFO 01-04 14:15:43 [loggers.py:111] Engine 000: Avg prompt throughput: 466.7 tokens/s, Avg generation throughput: 91.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cacheINFO 01-04 14:15:46 [loggers.py:111] EngineINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 172 |
+
INFO 01-04 14:15:53 [loggers.py:111] Engine 000: Avg prompt throughput: 354.8 tokens/s, Avg generation throughput: 102.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 173 |
+
INFO 01-04 14:15:56 [loggers.py:111] Engine 000: Avg prompt throughput: 358.2 tokens/s, Avg generation throughINFO 01-04 14:16:03 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:16:05 [loggers.py:111] Engine 000INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 174 |
+
INFO 01-04 14:16:06 [loggers.py:111] Engine INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 175 |
+
INFO 01-04 14:16:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 28.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cachINFO 01-04 14:16:16 [loggers.py:111] Engine INFO 01-04 14:16:23 [loggers.py:111] Engine 000: Avg prompt throughput: 384.0 tokens/s, Avg generation throughput: 39.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, PrefINFO 01-04 14:16:25 [loggINFO 01-04 14:16:33 [loggers.py:111] Engine 000: Avg prompt throughput: 905.5 tokens/s, Avg generation throughput: 82.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache hit rate: 15.6%
|
| 176 |
+
INFO: 10.46.17.192:0 - "POST /v1/compINFO 01-04 14:16:35 [loggINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 177 |
+
INFO 01-04 14:16:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 72.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache INFO 01-04 14:16:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokenINFO 01-04 14:16:49 [loggerINFO 01-04 14:16:53 [loggers.py:111] Engine 000: Avg prompt throughput: 352.9 tokens/s, Avg generation througINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 178 |
+
INFO 01-04 14:16:56 [loggers.py:111] Engine INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 179 |
+
INFO 01-04 14:17:03 [loggers.py:111] Engine 000: Avg prompt throughput: 176.1 tokens/s, Avg generation througINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 180 |
+
INFO 01-04 14:17:06 [loggers.py:111] Engine INFO 01-04 14:17:13 [loggers.py:111] Engine 000: Avg prompt throughput: 378.8 tokens/s, Avg generation throughput: 52.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cachINFO 01-04 14:17:16 [loggers.py:111] Engine INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 181 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 182 |
+
INFO 01-04 14:17:23 [loggers.py:111] Engine 000: Avg prompt throughput: 384.4 tokens/s, Avg generation throINFO 01-04 14:17:26 [loggers.py:111] Engine 000: Avg prompt throughput: 437.5 tokens/s, Avg generation throughpuINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 183 |
+
INFO 01-04 14:17:33 [loggers.py:111] Engine 000: Avg prompt throughput: 390.2 tokens/s, Avg generation throughput: 50.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache INFO 01-04 14:17:36 [loggers.py:111] EnginINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 184 |
+
INFO 01-04 14:17:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 40.9 tokens/s, RunnINFO 01-04 14:17:45 [loggers.py:111] Engine 00INFO 01-04 14:17:46 [loggers.py:111] EnINFO 01-04 14:17:53 [loggers.py:111] Engine 000: Avg prompt throughput: 1134.6 tokens/s, Avg generation throughput: 39.0 tokens/s, RunINFO 01-04 14:17:55 [loggers.py:111] Engine 00INFO 01-04 14:17:56 [loggers.py:111] EngINFO: 10.46.17.192:0 - "POST /v1INFO 01-04 14:17:59 [loggers.pINFO 01-04 14:18:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 71.3 tokens/s, RunINFO 01-04 14:18:05 [loggers.py:111] Engine 000:INFO 01-04 14:18:06 [loggers.py:111] EINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 185 |
+
INFO 01-04 14:18:13 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:18:15 [loggers.py:111] Engine 000: Avg prompt throughput: 470.8 tokens/s, Avg generation throughput:INFO 01-04 14:18:16 [loggers.py:111] Engine 000: Avg prompt throughput: 397.0 tokens/s, Avg generationINFO 01-04 14:18:23 [loggers.py:111] Engine 000: Avg prompt throughput: 497.0 tokens/s, Avg generation throughput: 21INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 186 |
+
INFO: 10.43.30.3:0 - "POST /v1/comINFO: 10.46.17.192:0 - "POST /v1/coINFO 01-04 14:18:29 [loggerINFO 01-04 14:18:33 [loggers.py:111] Engine 000: Avg prompt throughput: 758.9 tokens/s, Avg generation throughput: 31.3 tokens/s, RunninINFO 01-04 14:18:35 [loggers.py:111] Engine 000: Avg prompt throughput: 421.0 tokenINFO 01-04 14:18:43 [loggers.py:111] Engine 000: Avg prompt throughput: 251.6 tokens/s, Avg generation throughput: 78.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cacINFO: 10.46.50.INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 187 |
+
INFO 01-04 14:18:53 [loggers.py:111] Engine 000: Avg prompt throughput: 613.8 tokens/s, Avg generation throughput: 98.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cacINFO 01-04 14:18:55INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 188 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 189 |
+
INFO 01-04 14:19:03 [loggers.py:111] Engine 000: Avg prompt throughINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 190 |
+
INFO 01-04 14:19:05 [loggers.py:111] Engine 000: Avg prompt throughput: 686.9 tokens/INFO: 10.46.17.192:0 - "POSTINFO 01-04 14:19:09 [loggers.py:11INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 191 |
+
INFOINFO 01-04 14:19:15 [loggers.py:111] Engine 000: Avg prompt throughput: 520.1 tokens/s, Avg generation througINFO 01-04 14:19:16 [loggers.py:111] Engine 000: Avg prompt throughput: 449.5 tokens/s, Avg generation thrINFO 01-04 14:19:23 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:19:25 [loggers.py:111] EngineINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 192 |
+
INFO 01-04 14:19:26 [loggers.py:111] EnINFO 01-04 14:19:33 [loggers.py:111] Engine 000: Avg prompt throughput: 457.7 tokens/s, Avg generation throughput: 69.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cacheINFO 01-04 14:19:35 [loggers.py:111] Engine INFO 01INFO 01-04 14:19:39 [loggers.py:INFO 01-04 14:19:43 [loggers.py:111] Engine 000: Avg prompt throughput: 290.5 tokens/s, Avg generation throughput: 54.8 tokens/s, RunninINFO 01-04 14:19:45 [loggers.py:111] EnginINFO 01-04 14:19:46 [loggers.py:111] EngiINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 193 |
+
INFO 01-04 14:19:53 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:19:55 [loggers.py:111] Engine 000: Avg prompt throughput: 567.7 tokens/s, Avg generation througINFO 01-04 14:19:56 [loggers.py:111] EINFO 01-04 14:20:03 [loggers.py:111] Engine 000: Avg prompt throughput: 757.3 tokens/s, Avg generation throughput: 73.4 tokens/s, Running:INFO 01-04 14:20:05 [loggers.py:111] Engine 000: Avg prompt throughput: 696.2 tokens/s, Avg generation throughINFO 01-04 14:20:06 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:20:09 [loggers.pINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 194 |
+
INFO 01-0INFO 01-04 14:20:15 [loggers.py:111] Engine 000: Avg prompt throughput: 399.1 tokens/s, Avg generation througINFO 01-04 14:20:16 [loggers.py:111] Engine 000: Avg prompt throughput: 580.1 tokens/s, Avg generatioINFO 01-04 14:20:23 [loggers.py:111] Engine 000: Avg prompt throughput: 458INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 195 |
+
INFO 01-04 14:20:25 [loggers.py:111] EngiINFO 01-04 14:20:26 [loggers.py:111] INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 196 |
+
INFO 01-04 14:20:33 [loggers.py:111] Engine 000: Avg prompt throughput: 560.8 tokens/s, Avg generation throughput: 56.7 tokens/s, Running: 1INFO 01-04 14:20:35 [loggers.py:111] EngineINFO: 10.46.50.192:0 - "POST /v1INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 197 |
+
INFO 01-04 14:20:43 [loggers.py:111] Engine 000: Avg prompt throughput: 7INFO 01-04 14:20:45 [loggers.py:111] Engine 000: Avg prompt throughput: 547.0 tokens/s, Avg generation throughput: 30.1 tokens/s, Running: 1 reqs,INFO 01-04 14:20:53 [loggers.py:111] Engine 000: Avg prompt throughput: 643.3 tokens/s, Avg generation throughput: 54.1 tokens/s, Running: INFO 01-04 14:20:55 [loggers.py:111] Engine 000: Avg prompt throughput: 680.4 toINFO 01-04 14:21:03 [loggers.py:111] Engine 000: Avg prompt throughput: 599.1 tokens/s, Avg generation throughput: 104.5 tokens/s, Running:INFO 01-04 14:21:05 [loggers.py:111] Engine 000: Avg prompt throughput: 608.1 tokINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 198 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 199 |
+
INFO 01-04 14:21:13 [loggers.py:111] Engine 000: Avg prompt throughput: 5INFO 01-04 14:21:15 [loggers.py:111] Engine 000: Avg prompt throughput: 862.1 tokens/s, Avg generation throughput: 60.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache INFO 01-04 14:21:16 [loggers.py:111] EnginINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" INFO 01-04 14:21:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFO 01-04 14:21:25 [loggers.py:111] Engine 000: Avg prompt throughput: 485.5 tokens/s, Avg generation throughINFO 01-04 14:21:26 [loggers.py:1INFO 01-04 14:21:33 [loggers.py:111] Engine 000: Avg prompt throughput: 617.INFO 01-04 14:21:35 [loggers.py:111] Engine 000: Avg prompt throughput: 426.4 tokens/s, Avg generation throughINFO 01-04 14:21:36 [loggers.py:1INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 200 |
+
INFO 01-04INFO 01-04 14:21:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cacheINFO 01-04 14:21:46 [loggers.py:111INFO 01-04 14:21:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 201 |
+
INFO 01-04 14:21:55 [loggers.py:111] Engine 000: Avg prompt throughput: 1617.INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 202 |
+
INFO 01-04 14:22:03 [loggers.py:111] Engine 000: Avg prompt throughput: 555.2 tokens/s, Avg generation throughput: 54.1 tokens/s, Running: 1INFO 01-04 14:22:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokINFO 01-04 14:22:13 [loggers.py:111] Engine 000: Avg prompt throughput: 731.4 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cacheINFO 01-04 14:22:INFO 01-04 14:22:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 91.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.7%, Prefix cache hit rate: 16.0%
|
| 203 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 204 |
+
INFO 01-04 14:22:33 [loggers.py:111] Engine 000: Avg prompt throughput: 604.4 tokens/s, Avg generation throughput: 101.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cache hit rate: 17.1%
|
| 205 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTINFO 01-04 14:2INFO 01-04 14:22:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 52.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hINFO 01-04 14:2INFO 01-04 14:22:53 [loggers.py:111] Engine 000: Avg prompt throughput: 510.3 tokens/s, Avg generation throughput: 69.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 16.9%
|
| 206 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTPINFO 01-04 14INFO 01-04 14:23:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 53.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rINFO 01-04 INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 207 |
+
INFO 01-04 14:23:13 [loggers.py:111] Engine 000: Avg prompt throughput: 341.0 tokens/s, Avg generation throughput: 52.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit ratINFO: INFO 01-04 14:23:23 [loggers.py:111] Engine 000: Avg prompt throughput: 1349.9 tokens/s, Avg generation throughput: 83.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.1%, Prefix cache hit rate: 16.9%
|
| 208 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 209 |
+
INFO 01-04 14:23:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 108.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.7%, Prefix cache hit rate: 16.9%
|
| 210 |
+
INFO: 10.46.17.192:0 - "POST INFO 01-04 14:23:36 [loggers.py:1INFO 01-04 14:23:43 [loggers.py:111] Engine 000: Avg prompt throughput: 877.1 tokens/s, Avg generation throughput: 84.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.6%, Prefix cache hitINFO 01-04 14:23:45 [loggers.py:111] Engine 00INFO 01-04 14:23:46 [loggers.py:1INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 211 |
+
INFO 01-04 14:23:53 [loggers.py:111] Engine 000: Avg prompt throughput: 697INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 212 |
+
INFO 01-04 14:23:55 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 toINFO 01-04 14:24:03 [loggers.py:111] Engine 000: Avg prompt throughput: 312.1 tokens/s, Avg generation throughput: 62.8 tokens/s, Running: INFO 01-04 14:24:05 [loggers.py:111] Engine 000: Avg prompt throughput: 1436.9 tINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 213 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 214 |
+
INFO 01-04 14:24:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 INFO 01-04 14:24:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughpuINFO 01-04 14:24:16 [loggers.pyINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 215 |
+
INFO 01-04 14:24:23 [loggers.py:111] Engine 000: Avg prompt throughput: 1238.8 tokens/s, Avg generation throughput: 40.0 tokens/s, Running: 1 INFO: 10.43.30.5:0 - "POST /v1/completiINFO 01-04 14:24:26 [loggers.py:111INFO 01-04 14:24:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.INFO: 10.46.50.192:0 - "POST INFO 01-04 14:24:43 [loggers.py:111] Engine 000: Avg prompt throughput: 714.5 tokens/s, Avg generation throughput: 80.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.6%, Prefix cache hINFO 01-04 14:24:45 [loggers.py:111] Engine 000: Avg prompt throughput: 760.1 tINFO 01-04 14:24:53 [loggers.py:111] Engine 000: Avg prompt throughput: 584.7 tokens/s, Avg generation throughput: 84.8 tokens/s, Running: 2INFO 01-04 14:24:55 [loggers.py:111] Engine 000: Avg prompt throughput: 736.1 tINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 216 |
+
INFO 01-04 14:25:03 [loggers.py:111] Engine 000: Avg prompt throughput: 366.1 tokens/s, Avg generation throughput: 93.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2INFO 01-04 14:25:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generaINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 217 |
+
INFO 01-04 14:25:13 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:25:15 [loggers.py:111] Engine 000: INFO 01-04 14:25:16 [loggers.py:111] Engine 000: Avg prompt throughput: 681.3 tokens/s, Avg geneINFO 01-04 14:25:23 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:25:25 [loggers.py:111] Engine 000: Avg prompt throughput: 349.5 tokens/s, Avg generation throughput: 4INFO 01-04 14:25:26 [loggers.py:INFO 01-04 14:25:33 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:25:35 [loggers.py:111] Engine 000: Avg prompt throughput: 778.8 tokens/s, Avg generation throughput: 8INFO 01-04 14:25:36 [loggers.py:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 218 |
+
INFO INFO 01-04 14:25:45 [loggers.py:111] Engine 000: Avg prompt throughput: 786.8 tokens/s, Avg generation throughput: 8INFO 01-04 14:25:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1INFO 01-0INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 219 |
+
INFO: 10.45.190.192:0 - "POST /v1/completiINFO 01-04 14:25:56 [loggers.py:111] Engine 000: Avg prompt throughput: 600.3 tokens/s, Avg generaINFO 01-04 14:26:03 [loggers.py:111] Engine 000: Avg prompt throughput: 1210.4 tokens/s, Avg generation throughput: 56.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.2%, Prefix cache hit INFO 01-04 1INFO 01-04 14:26:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 83.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.5%, Prefix cache hit INFO 01-04 1INFO 01-04 14:26:23 [loggers.py:111] Engine 000: Avg prompt throughput: 768.1 tokens/s, Avg generation throughput: 128.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 12.0%, Prefix cacheINFO 01-04 14:26:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 220 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 221 |
+
INFO 01-04 14:26:33 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:26:35 [loggers.py:111] Engine 000: Avg prompt throughput: 731.0 tokens/s, Avg generation throughputINFO 01-04 14:26:36 [loggers.py:111] INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 222 |
+
ININFO 01-04 14:26:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usINFO 01-04 14:26:46 [loggers.py:111] EngINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 INFINFO 01-04 14:26:55 [loggers.py:111] Engine 000: Avg prompt throughput: 582.9 tokens/s, Avg generation throughINFO 01-04 14:26:56 [loggers.py:111] Engine 000: Avg prompt throughput: 624.1 tokens/s, Avg generation througINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 20INFO 01-04 14:27:03 [loggers.py:111] Engine 000:INFO 01-04 14:27:06 [loggers.py:111] Engine 000: Avg prompt throughput: 587.0 tokens/s, Avg generation throughput: 59.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU INFO 01-04 14:27:13 [loggers.py:111] Engine 000: AvINFO: 10.46.50.19INFO 01-04 14:27:15 [loggers.py:111] Engine 0INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 223 |
+
INFO 01-04 14:27:16 [loggers.py:111]INFO 01-04 14:27:23 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:27:25 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.7 tokens/s, Running: 1 reqs, WaiINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 224 |
+
INFO 01-04 14:27:33 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:27:35 [loggers.py:111] Engine 000: Avg prompt throughput: 802.7 tokens/s, Avg generation throughput:INFO 01-04 14:27:36 [loggers.py:11INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 225 |
+
INFO 01-04 14:27:43 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:27:45 [loggers.py:111] Engine 000: AINFO 01-04 14:27:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatiINFO 01-04 14:27:53 [loggers.py:111] Engine 000: Avg prompt throughput: 829.6 tokens/s, Avg generation throughput: 31.2 tokens/s, RunningINFO 01-04 14:27:55 [loggers.py:111] Engine 00INFO 01-04 14:27:56 [loggers.py:111]INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 226 |
+
INFO 01-04 14:28:03 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:28:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: INFO 01-04 14:28:06 [loggers.py:1INFO 01-04 14:28:13 [loggers.py:111] Engine 000: Avg prompt throughput: 615.0 tokens/s, Avg generation throughput: 14.0 tokens/s, RunningINFO 01-04 14:28:15 [loggers.py:111] Engine 000INFO 01-04 14:28:16 [loggers.py:111INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 227 |
+
INFO 01-04 14:28:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFO 01-04 14:28:25 [loggers.py:111] Engine 000INFO 01-04 14:28:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO 01-04 14:28:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs,INFO 01-04 14:28:35 [loggers.py:111] Engine INFO 01-04 14:28:36 [loggers.INFO 01-04 14:28:43 [loggers.py:111] Engine 000: Avg prompt throughput: 885.1 tokens/s, Avg generation throughput: 43.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: INFO 01-04 14:28:45 [loggers.py:111] EngineINFO 01-04 14:28:46 [loggers.INFO 01-04 14:28:53 [loggers.py:111] Engine 000: Avg prompt throughput: 635.7 tokens/s, Avg generation throughput: 4.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%,INFO 01-04 14:28:56 [loggers.pINFO 01-04 14:29:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%INFO 01-04 14:29:06 [loggers.pyINFO 01-04 14:29:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, INFO 01-04 14:29:16 [loggers.INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 228 |
+
INFO 01-04 14:29:23 [loggers.py:111] Engine 000: Avg prompt throughput: 669.3 tokens/s, Avg generation throughput: 48.5 tINFO 01-04 14:29:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatiINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 229 |
+
INFO 01-04 14:29:33 [loggers.py:111] Engine 000: Avg INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 230 |
+
INFO 01-04 14:29:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatiINFO 01-04 14:29:43 [loggers.py:111] Engine 000: Avg prompt throughput: 700.5 tokens/s, Avg generation throughput: 25.4INFO 01-04 14:29:46 [loggers.py:111] Engine 000: Avg prompt throughput: 838.5 tokens/s, Avg generatiINFO 01-04 14:29:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tINFO 01-04 14:29:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPINFO 01-04 14:30:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 toINFO 01-04 14:30:05 [loggers.py:111] EngiINFO 01-04 14:30:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatiINFO 01-04 14:30:13 [loggers.py:111] Engine 000: Avg prompt throughput: 614.1 tokens/s, Avg generation throughput: 20.4INFO 01-04 14:30:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generationINFO 01-04 14:30:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.INFO 01-04 14:30:26 [loggers.py:111] Engine 000: Avg prompt throughput: 1149.3 tokens/s, Avg generatiINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 231 |
+
INFO 01-04 14:30:33 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:30:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, INFO 01-04 14:30:43 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:30:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 232 |
+
INFO 01-04 14:30:53 [loggers.py:111] Engine 000: Avg INFO 01-04 14:30:56 [loggers.py:111] Engine 000: Avg prompt throughput: 875.2 tokens/s, Avg generation throughput: 20.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, INFO 01-04 14:31:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokeINFO 01-04 14:31:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generINFO 01-04 14:31:13 [loggers.py:111] Engine 000: Avg prompt throughput: 779.9 tokens/s, Avg generation throughput: 29.9 toINFO 01-04 14:31:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO 01-04 14:31:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tINFO 01-04 14:31:26 [loggers.py:111] Engine 000: Avg prompt throughput: 799.2 tokens/s, Avg generation throughput: 18.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, INFO 01-04 14:31:33 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:31:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs,INFO 01-04 14:31:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokINFO 01-04 14:31:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generaINFO 01-04 14:31:53 [loggers.py:111] Engine 000: Avg prompt throughput: 688.5 tokens/s, Avg generation throughput: 34.5 INFO 01-04 14:31:56 [loggINFO 01-04 14:32:05 [loggers.py:111] Engine 000: Avg prompt throughput: 650.2 tokens/s, Avg generation throughput: 41.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, PrefINFO 01-04 14:32:06 [loggINFO 01-04 14:32:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, PrefINFO 01-04 14:32:16 [loggINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 233 |
+
INFO 01-04 14:32:25 [loggers.py:111] Engine 000: Avg prompt throughput: 544.9 tokens/s, Avg generation throughput: 52.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, PINFO 01-04 14:32:26 [loggersINFO 01-04 14:32:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPUINFO 01-04 14:32:39 [loggers.py:111] Engine 000: AvgINFO 01-04 14:32:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%,INFO 01-04 14:32:56 [loggers.py:111] Engine 000: Avg prompt throughput: 710.3 tokens/s, Avg generation throughput: 33.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 15.6%
|
| 234 |
+
INFO 01-04 14:33:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 15.6%
|
| 235 |
+
INFO: 10.45.190.192:0 - "POST /v1/compleINFO 01-04 14:33:49 [loggers.py:111] Engine 000: Avg prompt throughput: 684.6 tokens/s, Avg generation throughput: 9.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7INFO 01-04 14:33:53 [loggers.py:INFO 01-04 14:33:59 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%INFO 01-04 14:34:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO 01-04 14:34:09 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 12.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%INFO 01-04 14:34:13 [loggers.pyINFO 01-04 14:34:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%,INFO 01-04 14:34:23 [loggers.pINFO 01-04 14:34:59 [loggers.py:111] Engine 000: Avg prompt throughput: 779.7 tokens/s, Avg generation throughput: 44.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 17.1%
|
| 236 |
+
INFO 01-04 14:35:09 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 17.1%
|
| 237 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 238 |
+
INFO 01-04 14:35:19 [loggers.py:111] Engine 000:INFO 01-04 14:35:25 [loggers.py:111] Engine 000: Avg prompt throughput: 608.8 tokens/s, Avg generation throughput: 24.4 tokens/s, Running: INFO 01-04 14:35:33 [loggers.py:111] Engine 000: Avg INFO 01-04 14:35:36 [loggers.py:111] Engine 000: Avg prompt throughput: 796.6 tokens/s, Avg generation throughput: 19.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPINFO 01-04 14:35:43 [loggers.py:111] Engine 000: Avg INFO 01-04 14:35:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPINFO: 10.46.50.192:0 - "POINFO: 10.43.30.4:0 INFO 01-04 14:35:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 17.3%
|
| 239 |
+
INFO 01-04 14:36:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 17.3%
|
| 240 |
+
INFO: 10INFO 01-04 14:36:43 [loggers.py:111] Engine 000: Avg prompt throughput: 291.8 tokens/s, Avg generation throughput: 32.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 16.0%
|
| 241 |
+
INFO 01-04 14:36:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 16.0%
|
| 242 |
+
INFO 01-04 14:37:03 [loggers.py:111] Engine 000: AvgINFO 01-04 14:37:06 [loggers.py:111] Engine 000: Avg prompt throughput: 1216.2 tokens/s, Avg generation throughput: 40.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, INFO 01-04 14:37:13 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:37:16 [loggers.py:111] INFO 01-04 14:37:29 [loggers.py:111] Engine 000: Avg prompt throughput: 297.7 tokens/s, Avg generation throughput: 18.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 17.3%
|
| 243 |
+
INFO 01-04 14:37:39 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 17.3%
|
| 244 |
+
INFO 01-04 14:37:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 17.3%
|
| 245 |
+
INFO 01-04 14:37:59 [loggerINFO 01-04 14:38:16 [loggers.py:111] Engine 000: Avg prompt throughput: 363.3 tokens/s, Avg generation throughput: 28.4 tokens/s, Running: 1 reqs, Waiting: 0 reqINFO 01-04 14:38:23 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:38:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg geneINFO 01-04 14:38:36 [loggers.py:111] Engine 000: Avg prompt throughput: 624.1 tokens/s, Avg generation throughput: 34.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%,INFO 01-04 14:38:36 [loggers.pINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 246 |
+
INFO 01-04 14:38:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 28.1 tokINFO 01-04 14:38:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generaINFO 01-04 14:38:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokINFO 01-04 14:39:26 [loggers.py:111] Engine 000: Avg prompt throughput: 649.9 tokens/s, Avg geneINFO 01-04 14:40:46 [loggers.py:111] Engine 000: Avg prompt throughput: 1023.4 tokens/s, Avg generation throughput: 12.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 16.2%
|
| 247 |
+
INFO 01-04 14:40:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 16.2%
|
| 248 |
+
INFO 01-04 14:41:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 16.2%
|
| 249 |
+
INFO 01-04 14:41:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.INFO 01-04 14:42:06 [loggers.py:111] Engine 000: Avg prompt throughput: 1721.1 tokens/s, Avg generation throughput: 23.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.9%, Prefix cache hit rate: 17.7%
|
| 250 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 251 |
+
INFO 01-04 14:42:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 36.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.7%
|
| 252 |
+
INFO 01-04 14:42:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.7%
|
| 253 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 254 |
+
INFO 01-04 14:45:56 [loggers.py:111] Engine 000: Avg prompt throughput: 1917.4 tokens/s, Avg generation throughput: 34.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.9%
|
| 255 |
+
INFO 01-04 14:46:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.9%
|
| 256 |
+
INFO 01-04 14:53:06 [loggers.py:111] Engine 000: Avg prompt throughput: 410.1 tokens/s, Avg generation throughput: 42.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 18.8%
|
| 257 |
+
INFO 01-04 14:53:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 18.8%
|
| 258 |
+
INFO 01-04 14:53:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 18.8%
|
| 259 |
+
INFO 01-04 14:53:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 18.8%
|
| 260 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 261 |
+
INFO 01-04 14:53:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 11.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.8%
|
| 262 |
+
INFO 01-04 14:53:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.8%
|
| 263 |
+
INFO 01-04 15:05:56 [loggers.py:111] Engine 000: Avg prompt throughput: 513.0 tokens/s, Avg generation throughput: 24.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 18.6%
|
| 264 |
+
INFO 01-04 15:06:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 18.6%
|
| 265 |
+
INFO 01-04 15:06:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/sINFO 01-04 15:07:55 [loggers.py:111] Engine 000: Avg prompt throughput: 524.6 tokens/s, Avg generation throughput: 38.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 19.4%
|
| 266 |
+
INFO 01-04 15:08:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 19.4%
|
| 267 |
+
INFO 01-04 15:08:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 19.4%
|
| 268 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 269 |
+
INFO 01-04 15:08:25 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 37.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.4%
|
| 270 |
+
INFO 01-04 15:08:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.4%
|
| 271 |
+
Avg generation throughput: 33.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.6%
|
| 272 |
+
INFO 01-04 15:07:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.6%
|
| 273 |
+
py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 16.7%
|
| 274 |
+
INFO 01-04 15:02:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 16.7%
|
| 275 |
+
INFO 01-04 15:02:59 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 16.7%
|
| 276 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 277 |
+
INFO 01-04 15:03:09 [loggers.py:111] Engine 0INFO 01-04 15:10:27 [loggers.py:111] Engine 000: Avg prompt throughput: 553.8 tokens/s, Avg generation throughput: 12.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 12.5%
|
| 278 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 279 |
+
INFO 01-04 15:10:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 28.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.5%
|
| 280 |
+
INFO 01-04 15:10:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.5%
|
hf_ip/vllm_gpu4.log
ADDED
|
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 2 |
+
warnings.warn(
|
| 3 |
+
INFO 01-04 13:13:36 [__init__.py:239] Automatically detected platform cuda.
|
| 4 |
+
INFO 01-04 13:13:38 [api_server.py:1043] vLLM API server version 0.8.5
|
| 5 |
+
INFO 01-04 13:13:38 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8005, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
|
| 6 |
+
INFO 01-04 13:13:46 [config.py:717] This model supports multiple tasks: {'score', 'reward', 'generate', 'embed', 'classify'}. Defaulting to 'generate'.
|
| 7 |
+
INFO 01-04 13:13:47 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
|
| 8 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 9 |
+
warnings.warn(
|
| 10 |
+
INFO 01-04 13:13:51 [__init__.py:239] Automatically detected platform cuda.
|
| 11 |
+
INFO 01-04 13:13:54 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
|
| 12 |
+
WARNING 01-04 13:13:55 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7f17117328f0>
|
| 13 |
+
INFO 01-04 13:13:56 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
|
| 14 |
+
INFO 01-04 13:13:56 [cuda.py:221] Using Flash Attention backend on V1 engine.
|
| 15 |
+
WARNING 01-04 13:13:56 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
|
| 16 |
+
INFO 01-04 13:13:56 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
INFO 01-04 13:15:40 [loader.py:458] Loading weights took 104.27 seconds
|
| 35 |
+
INFO 01-04 13:15:41 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 104.572266 seconds
|
| 36 |
+
INFO 01-04 13:15:55 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
|
| 37 |
+
INFO 01-04 13:15:55 [backends.py:430] Dynamo bytecode transform time: 14.47 s
|
| 38 |
+
INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later use
|
| 39 |
+
INFO 01-04 13:16:50 [backends.py:148] Compiling a graph for general shape takes 53.37 s
|
| 40 |
+
INFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 67.84 s in total
|
| 41 |
+
INFO 01-04 13:17:54 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
|
| 42 |
+
INFO 01-04 13:17:54 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94x
|
| 43 |
+
INFO 01-04 13:18:32 [gpu_model_runner.py:1686] Graph capturing finished in 39 secs, took 1.21 GiB
|
| 44 |
+
INFO 01-04 13:18:33 [core.py:159] init engine (profile, create kv cache, warmup model) took 171.77 seconds
|
| 45 |
+
INFO 01-04 13:18:33 [core_client.py:439] Core engine process 0 ready.
|
| 46 |
+
WARNING 01-04 13:18:33 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
|
| 47 |
+
INFO 01-04 13:18:33 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 48 |
+
INFO 01-04 13:18:33 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 49 |
+
INFO 01-04 13:18:33 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8005
|
| 50 |
+
INFO 01-04 13:18:33 [launcher.py:28] Available routes are:
|
| 51 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /openapi.json, Methods: GET, HEAD
|
| 52 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /docs, Methods: GET, HEAD
|
| 53 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: GET, HEAD
|
| 54 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /redoc, Methods: GET, HEAD
|
| 55 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /health, Methods: GET
|
| 56 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /load, Methods: GET
|
| 57 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /ping, Methods: POST, GET
|
| 58 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /tokenize, Methods: POST
|
| 59 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /detokenize, Methods: POST
|
| 60 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/models, Methods: GET
|
| 61 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /version, Methods: GET
|
| 62 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
|
| 63 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/completions, Methods: POST
|
| 64 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/embeddings, Methods: POST
|
| 65 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /pooling, Methods: POST
|
| 66 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /score, Methods: POST
|
| 67 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/score, Methods: POST
|
| 68 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
|
| 69 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /rerank, Methods: POST
|
| 70 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/rerank, Methods: POST
|
| 71 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v2/rerank, Methods: POST
|
| 72 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /invocations, Methods: POST
|
| 73 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /metrics, Methods: GET
|
| 74 |
+
INFO: Started server process [1317292]
|
| 75 |
+
INFO: Waiting for application startup.
|
| 76 |
+
INFO: Application startup compININFO 01-04 14:00:00 [loggers.py:111] Engine 000: Avg prompt throughput: 71.4 tokens/s, Avg generation throughput: 37.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.4%, Prefix cache hit rate: 0.0IIINFO 01-04 14:00:10 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.6%, Prefix cache hit rate: 0.0ININFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 77 |
+
INFO 01-04 14:00:20 [loggers.py:111] Engine 000: Avg prompt throughput: 58.3 tokens/s, Avg generation throughput: 48.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.3%, Prefix cache hit rate: 0INFIINFO 01-04 14:00:30 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.4%, Prefix cache hit rate: 0.0ININFO 01-04 14:00:40 [loggers.py:111] Engine 000: Avg prompt throughput: 122.0 tokens/s, Avg generation throughput: 96.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 0.6%
|
| 78 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.INFO 01-04INFO 01-04 14:00:50 [loggers.py:111] Engine 000: Avg prompt throughput: 63.6 tokens/s, Avg generation throughput: 100.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 1.0%
|
| 79 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.INFO 01-INFO 01-04 14:00:57 [loggers.py:111] Engine 000: Avg prompt tINFO: INFO 01-04 14:01:00 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 80.3 tokens/s, Running: 1 rINFO 01-04 14:01:05 [loggers.py:111] Engine 000: Avg prompt throughput: 6INFINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 80 |
+
INFO 01-04 14:01:10 [loggers.py:111] Engine 000: Avg prompt throughput: 66.4 tokens/s, Avg generation throughput: 74.0 tokens/s, Running:INFO 01-04 1INFO 01-04 14:01:17 [loggers.py:111] Engine 000: Avg prompt throughIINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 81 |
+
INFO 01-04 14:01:20 [loggers.py:111] Engine 000: Avg prompt throughput: 85.7 tokens/INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1"INFO 01-INFO INFO 01-04 14:01:27 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:01:30 [loggers.py:111] Engine 000: Avg prompt throughput: 104.9 tokens/s, Avg generation throughput: 65.9 tokens/s, Running: 2 reqs, WINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 82 |
+
INFO 0INFO 01-04 14:01:40 [loggers.py:111] Engine 000: Avg prompt throughput: 95.6 tokens/s, Avg generation throughput: 113.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hINFO: INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1ININFO 01INFO 01-04 14:01:47 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14INFO 01-04 14:01:50 [loggers.py:111] Engine 000: Avg prompt throughput: 83.3 tokens/s, Avg generation throughput: 90.9 tokens/s, Running: 2 reqsINFO 01-04 14:01:57 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:01:57 [loggers.py:111] Engine 000: Avg prompt throughput: 75.7 tokens/s, Avg generation throughput: 85.5 tokens/s, Running: 2 rINFO 01-04 14:02:0INFO 01-04 14:02:06 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:02:07 [loggers.py:111] Engine 000: Avg prompt throughput: 81.INFO 01-04 14:02:07 [loggers.py:111] Engine 000: INFO 01-04 INFO 01-04 14:02:10 [loggers.py:111] Engine 000: Avg prompt throughput: 1INFO 01-0INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 83 |
+
INFO 01-0INFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompt throughput: 73.3 tokens/s, Avg generation throughput: 89.4INFO 01-0INFO 01-04 14:02:20 [loggers.py:111] Engine 000: Avg prompt throughput: INFO: INFO 01-04INFO 01-04 14:02:27 [loggers.py:111] Engine 000: Avg prompt throughput: 83.8 tokens/s, Avg generation throughput: 122.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usageINFO: INFO 01-04INFO 01-04 14:02:33 [loggers.py:111] Engine 000: Avg prompt throughput: 87.INFO 01-04 14:02:37 [loggers.py:111] Engine 000: Avg prompt throughput: 118.8 tokens/s, Avg generation throughput: 81.1 tokens/s, RunINFO 01-04INFO 01-04 14:02:43 [loggers.py:111] Engine 000: Avg prompt throughput: 105.1 tokens/s, Avg generation throughput: 151.5 tokens/s,INFO 01-0INFO 01-04 14:02:47 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:02:50 [loggers.py:111] Engine 000: Avg prompt throughput: 101.4 tokens/s, Avg generation throughput: 91.6 tokens/s, Running: 2 reqsINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 2INFO 01-04 INFO 01-04 14:02:56 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:03:00 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 70.3 tokens/s, Running: 1INFO 01-04 14:03:05 [loggers.py:111] Engine 000: Avg prompt throughput: 111.1 IINFO 01-04 14:03:07 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:03:10 [loggers.py:111] Engine 000: Avg prompt throughput: 67INFO 01-04 14:03:15 [loggers.py:111] Engine 000: Avg prompt throughput: 73.1 tokens/s, Avg generation throughput: 125.7 tokens/s, Running: 3 reINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 84 |
+
INFO 01-04 14:03:20 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:03:25 [loggers.py:111] Engine 000: Avg prompt throughput: 101.4 tokens/s, Avg generation throughput: 149.7 tokens/s, Running: 3 rIININFO 01-04 14:03:30 [loggers.py:111] Engine 000: Avg prompt throughput: 93.3 tokens/s, Avg generation throughput: 74.1 tokens/s, Running: 2INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 85 |
+
INFO 01-0INFINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 86 |
+
INFO 01-04 14:03:40 [loggers.py:111] Engine 000: Avg prompt throughput: 119.4 tokens/s, Avg generation throughput: 105.1 tokens/s, Running: 2 reqs, WaIINFO 01-04 14:03:47 [loggers.py:111] Engine 000: Avg prompt throughpINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 87 |
+
INFO 01-04 14:03:50 [loggers.py:111] Engine 000: Avg prompt throughput: 134.9 tokensINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1"INFO: INFO 01-04 14:03:57 [loggers.py:111] Engine 000: Avg prompt throughput: 114.2 tokens/s, Avg generation throughput: 100.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cINFO 01-04 14:04:00 [loggers.py:111] Engine 000: Avg prompt throughput: 141.3 INFO 0INFO 01-04 14:04:07 [loggers.py:111] Engine 000: Avg prompt throughput: 160.8 tokens/s, Avg generation throughput: 90.1 tokens/s, RunnINFO 01-04 14:04:10 [loggers.py:111] Engine 000: Avg prompt throughput: 172.8 tokens/s, Avg generation throughput: 57.7 tokens/s, Running: 2 rINFO 01-04 14:04:15 [loggers.py:111] Engine 000: Avg prompt throughput: INININFO 01-04 14:04:20 [loggers.py:111] Engine 000: Avg prompt throughput: 120.0 tokens/s, Avg generation throughput: 100.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hitINFO: 10INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/INFO 01-0INFO 01-04 14:04:27 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:04:30 [loggers.py:111] Engine 000: Avg prompt throughput: 224.8 tokens/s, Avg generation throughput: 95.4 tokens/s, Running: 2 reqs,INFO INFO 01-04 14:04:37 [loggers.py:111] Engine 000: Avg prompt throughINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 88 |
+
INFO 01-04 14:04:40 [loggers.py:111] Engine 000: Avg prompt throughput: 174.6 tokens/s, Avg generation throughput: 92.4 tokens/s, Running: 2 reIINFO 01-INFO 01-04 14:04:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 6INFO: 10.45.190INFO 01-04 14:04:50 [loggers.py:111] Engine 000: Avg prompt throughput: 175.2INFO 01-04 14:04:53 [loggers.py:111] Engine 000: Avg prompt throINFO 01-INFO 01-04 14:04:57 [loggers.py:111] Engine 000: Avg prompt throughpuINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 89 |
+
INFO: 1INFO 01-04 14:05:03 [loggers.py:111] Engine 000: Avg prompt throughputINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 90 |
+
INFO 01-04 14:05:07 [loggers.py:111] Engine 000: Avg prompt throughput: 138.2 tokens/s, Avg generation throughput: INFO 01-04 14:05:07 [lINFO 01-04 14:05:10 [loggers.py:111] Engine 000: Avg prompt throughput: 195.9 tokINFO 01-04 14:05:17 [loggers.py:111] Engine 000: Avg prompt throughput: 163.3 tokens/s, Avg generation throughput: 83.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usageINFO 01-04 14:05:17 [INFO 01-04 14:05:20 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 1INFO 01-04 14:05:27 [loggers.py:111] Engine 000: Avg prompt throughput: 147.4 tokens/s, Avg generation throughput:INFO 01-04 14:05:27 [loggers.py:111] Engine 000: Avg prompt throughput: 148.2 tokens/sINFO 01-04 14:05:3INFO: 10.43.30.5:0 - "POST /v1/completions HTTPINFO 01-04 14INFO 01-04 14:05:37 [loggers.py:111] Engine 000: Avg prompt throughput: 121.6 tokens/s, Avg generation throughput: 103.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix caINFO 0INFO: 10.4INFO 01-04 14:05:47 [loggers.py:111] Engine 000: Avg prompt throughput: 144.5 tokens/s, Avg generation throughput: 114.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 91 |
+
INFO 01-04 14:05:50 [logINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 92 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 93 |
+
INFO 01-04 14:05:57 [loggers.py:111] Engine 000: Avg prompt throughput: 191.0 tokens/s, Avg generation throughput: 100.5 tokens/s, RunninINFOINFO 01-04 14:06:03 [loggers.py:111] Engine 000: Avg prompt throughput: 194.0 tINFO 01-04 14:06:07 [loggers.py:111] Engine 000: Avg prompt throughput: 209.2 tokens/s, Avg generation throughput: 76.9 tokens/s, Running: INFO 01-04 14:06:07 [loggers.py:111] Engine 000: Avg prompt throughput: 210.9 toINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 94 |
+
INFO 01-04 14:06:17 [loggers.py:111] Engine 000: Avg prompt throughput: 226.9 tokens/s, Avg generation throughput: 88.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1INFO 01-04 14:06:20 [loggers.py:111] Engine 000: Avg prompt throughput: 505.5 tokens/s, Avg generaINFO 01-04 14:06:27 [loggers.py:111] Engine 000: Avg prompt throughput: 175.3 tokens/s, Avg generation throughput: 86.4 INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 95 |
+
INFO: 10.46.17.192:0 - "POST /vINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 96 |
+
INFO 01-04 14:06:37 [loggers.py:111] Engine 000: Avg prompt throughput: 178.1 tokens/s, Avg generation throughput: 107.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KINFO 01-04 14INFO: 10.45.190.192:0 - "POST /v1INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 97 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 98 |
+
INFO 01-04 14:06:47 [loggers.py:111] Engine 000: Avg prompt throughput: 244.1 tokens/s, Avg generation throughput: INFO 01-04 14:06:50 [loggers.py:111] Engine 000: Avg prompt throughput: 229.6 tokens/s, Avg generation throughput: 10.1 tokens/s, Running: 1 reqsINFO: 10.46.17.192:0INFO 01-04 14:06:57 [loggers.py:111] Engine 000: INFO 01-04 14:07:00 [loggers.py:111] Engine 000: Avg prompt throughput: 511.7 tokens/s, Avg generation throughput: 87.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KVINFO 01-04 14:07:07 [loggers.py:111] Engine 000: Avg prompt throughput: 264.4 tokens/s, Avg generation throughput:INFO 01-04 14:07:10 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 99 |
+
INFO: 10.45.190.192:0 - "POST /v1/completiINFO: 10.46.17.19INFO 01-04 14:07:17 [loggers.py:111] Engine 0INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 100 |
+
INFO 01-04 14:07:20 [loggers.py:111] Engine 000: Avg prompt throughput: 570.5 tokens/s, Avg generation throughINFO: 10.43.30.4:0 - "POST /v1/comINFO 01-04 14:07:25 [loggeINFO 01-04 14:07:27 [loggers.py:111] Engine 00INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 101 |
+
INFO 01-04 14:07:30 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokeINFOINFO: 10.43.30.5:0 - INFO 01-04 14:07:37 [loggers.py:111] Engine 000: Avg prompt throughput: 250.0 tokens/s, Avg generation throughput: 64.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:07:40 [loggers.py:111] Engine 000INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 102 |
+
INFO: 10.43.30.5:0 - "POST /v1/coINFO 01-04 14:07:45 [loggerINFO 01-04 14:07:47 [loggers.py:111] Engine 000: Avg prompt throughput: 201.9 tokens/s, Avg generation thrINFO 01-04 14:07:50 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, INFO 01-04 14:07:55 [loggerINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 103 |
+
INFO 01-04 14:07:57 [loggers.py:111] EngINFO 01-04 14:08:00 [loggers.py:111] Engine 000: Avg prompt throughput: 412.5 tokens/s, Avg generation throughput: 79.1 tokens/s, Running: 3 reqs, WaitiINFO: 10.43.30.4:0 - "PINFO 01-04 14:08:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation througINFO 01-04 14:08:10 [loggers.py:111] Engine 000: Avg prompt throughput: 160.1 tokens/s, Avg generation throughput: 102.0 tokens/s, Running: 3 reqs, WaitINFO: 10.46.50.192INFO: 10.46.50.192:0 - "POST /v1/completINFO 01-04 14:08:13 [lINFO 01-04 14:08:17 [loggers.py:111] Engine 000INFO 01-04 14:08:20 [loggers.py:111] Engine 000: Avg prompt throughput: 309.3 tokens/s, Avg generation throughput: 107.1 tokens/s, Running: 3 reqs, WINFO: 10.43.30.3:0 INFO 01-04 14:08:27 [loggers.py:111] Engine 000: Avg prompt throughput: 274.5 tokens/s, Avg generation throughput:INFO 01-04 14:08:30 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 104 |
+
INFO: 10.46.17.192:0 - "POST /v1/comINFO 01-04 14:08:35 [loggeINFO 01-04 14:08:37 [loggers.py:111] Engine 000: AINFO 01-04 14:08:40 [loggers.py:111] Engine 000: Avg prompt throughput: 281.3 tokens/s, Avg generation throughput: 87.1 tokens/s, Running: 2 reqsINFO 01-04 14:08:43 [loggers.py:111] Engine 000: Avg prompt throughput: 334.6 tokens/s, AvINFO 01-04 14:08:47 [loggers.py:111] Engine 000: AINFO 01-04 14:08:50 [loggers.py:111] Engine 000: Avg prompt throughput: 484.9 tokens/s, Avg generation throughput: 93.2 tokens/s, Running: 3 reqINFO 01-04 14:08:53 [loggINFO: 10.43.30.4:0 - "POST /v1/coINFO: 10.46.50.192:0 - INFO 01-04 14:08:57 [loggers.py:111] Engine 000: Avg prompt throughput: 349.3 tokens/s, Avg generation throughput: 52.8 INFO 01-04 14:09:00 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:09:03 [loggerINFO 01-04 14:09:07 [loggers.py:111] Engine 000: Avg prompt throughput: 224.3 tokens/s, Avg generation throughput: 48.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO:INFO 01-04 14:09:13 [loggers.py:111] Engine 000: Avg prompt throughput: 341.7 tokens/s, Avg geINFO 01-04 14:09:17 [loggers.py:111] Engine 000: Avg prompt throughput: 169.0 tokens/s, Avg generation throughput: 54.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.INFOINFO 01-04 14:09:23 [loggers.py:111] Engine 000: Avg prompt throughput: 409.8 tokens/s, Avg geneINFO 01-04 14:09:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 70.7 tINFINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 105 |
+
INFO 01-04 14:09:33 [loggers.py:111] Engine 000: Avg prompt throughput: 265.8 tokens/s, Avg generINFO 01-04 14:09:37 [loggers.py:111] Engine 000: Avg prompt throughput: 268.1 tokens/s, Avg generation throughput: 25.3INFINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 106 |
+
INFO 01-04 14:09:43 [loggers.py:1INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 107 |
+
INFO 01-04 14:09:47 [loggers.py:111] Engine 000: Avg prompt throughput: 188.7 tokens/s, Avg generation throughput: 68.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-04 14:09:53 [loggers.py:111] Engine 000: Avg prompt throughput: 209.5 tokens/s, Avg generatioINFO 01-04 14:09:57 [loggers.py:111] Engine 000: Avg prompt throughput: 450.7 tokens/s, Avg generation throughput: 55.INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 INFO 01-04 14:10:03 [loggers.py:111] EINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 108 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 109 |
+
INFO 01-04 14:10:07 [loggers.py:111] Engine 000: AINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 110 |
+
INFO 01-04 14:10:13 [loggers.py:111] Engine 000: Avg prompt throughput: 336.4 tokens/s, Avg generation INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 111 |
+
INFO 01-04 14:10:17 [loggers.py:111] Engine 000: Avg prompt throughput: 386.2 tokens/s, Avg generation throughput:INFO 01-04 14:10:23 [loggers.py:111] Engine 000: Avg prompt throughput: 253.9 tokens/s, Avg generation thINFO 01-04 14:10:27 [loggers.py:111] Engine 000: Avg prompt throughput: 228.2 tokens/s, Avg generation throughput: 37.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usINFO 01-04 14:10:33 [loggers.py:111] EngINFO 01-04 14:10:37 [loggers.py:111] Engine 000: Avg prompt throughput: 340.4 tokens/s, Avg generation throughput: 52.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache uINFO:INFO 01-04 14:10:40 [loggers.py:111]INFO: 10.46.50.192:0 -INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 112 |
+
INFO 01-04 14:10:43 [loggers.py:111] EnginINFO 01-04 14:10:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 38INFO 01-04 14:10:50 [loggers.py:111] Engine 000: Avg prompt tINFO: 10.46.50.192:0 - "POST /v1/compINFO 01-04 14:10:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.INFO 01-04 14:11:00 [loggers.py:111] Engine 000: Avg prompt throughput: 193.1 tokens/s, Avg generatioINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 113 |
+
INFO 01-04 14:11:07 [loggers.py:111] Engine 000: AvgINFO 01-04 14:11:10 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.0 tokeINFO 01-04 14:11:13 [loggers.py:111] Engine 0INFO 01-04 14:11:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cachINFO: 10.43.30.5:0 - "POST /v1/completioINFO 01-04 14:11:47 [loggers.py:111] Engine 000: Avg prompt throughput: 424.8 tokens/s, Avg generation throughput: 37.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 13.3%
|
| 114 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 115 |
+
INFO 01-04 14:11:57 [loggers.py:111] Engine 000: Avg prompt throughput: 307.4 tokens/s, Avg generation throughput: 16.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU INFO 01-04 14:12:03 [loggers.py:111] Engine 000: AvINFO 01-04 14:12:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KINFO 01-04 14:12:13 [loggers.py:111] Engine 000: AINFO 01-04 14:12:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU INFO 01-04 14:12:23 [loggers.py:111] Engine 000: AvINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 116 |
+
INFO 01-04 14:12:27 [loggers.py:111] Engine 000: Avg prompt throughput: 517.1 tokens/s, Avg generation throughput: 102.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPINFO 01-04 14:12:33 [loggers.py:111] Engine 000: Avg INFO 01-04 14:12:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 95.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 12.4%
|
| 117 |
+
INFO: 10.INFO: 10.46.50.192:0 - "POST /v1/completions HTTPINFO: 10.INFO 01-04 14:12:43 [loggers.py:111] Engine 000: Avg INFO 01-04 14:12:47 [loggers.py:111] Engine 000: Avg prompt throughput: 293.8 tokens/s, Avg generation throughput: 52.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GINFO 01-04 14:12:53 [loggers.py:111] Engine 000: Avg pINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 118 |
+
INFO 01-04 14:12:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, INFO 01-04 14:13:03 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:13:07 [loggers.py:111] Engine 000: Avg prompt throughput: 225.2 tokens/s, Avg generation throughput: 16.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs,INFO 01-04 14:13:13 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:13:17 [loggers.py:111] Engine 000: Avg prompt throughput: 215.6 tokens/s, Avg generation throughput: 70.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs,INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.INFO: INFO 01-04 14:13:23 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:13:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 69.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 13.2%
|
| 119 |
+
INFO: 10INFO 01-04 14:13:33 [loggers.py:111] Engine 000: Avg pINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 120 |
+
INFO 01-04 14:13:37 [loggers.py:111] Engine 000: Avg prompt throughput: 662.8 tokens/s, Avg generINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 121 |
+
INFO 01-04 14:13:43 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:13:47 [loggers.py:111] Engine 000: Avg prompt throughput: 294.4 tokens/s, Avg generation throughput: 58.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs,INFO 01-04 14:13:53 [loggers.py:111] Engine 000: Avg prompt throughput: 412.0 tokens/s, Avg generation throughput: 80.9 toINFO 01-04 14:13:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 122 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 123 |
+
INFO 01-04 14:14:03 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:14:07 [loggers.py:111] Engine 000: Avg prompt throughput: 995.4 tokens/s, Avg generation throughput: 60.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 124 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 125 |
+
INFO 01-04 14:14:17 [loggers.py:111] Engine 000: Avg prompt throughput: 437.8 tokens/s, Avg generation INFO 01-04 14:14:23 [loggers.py:111] Engine 000: Avg prompt throughput: 556.7 tokens/s, Avg generation throughput: 1INFO 01-04 14:14:27 [loggers.py:111] Engine 000: Avg prompt throughput: 293.1 tokens/s, Avg generation tINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 126 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 127 |
+
INFO 01-04 14:14:37 [loggers.py:111] Engine 000: Avg prompt throughput: 382.0 tokens/s, Avg generation throughput: 89.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 12.1%
|
| 128 |
+
INFO: 10.46.1INFO: 10.43.30.4:0 - "POST /v1/completions HTINFO 01-04 14:14:47 [loggers.py:111] Engine 000: Avg prompt throughput: 519.8 tokens/s, Avg generation throughput: 68.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 12.8%
|
| 129 |
+
INFO: 10.46INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 130 |
+
INFO 01-04 14:14:53 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:14:57 [loggers.py:111] Engine 000: Avg prompt throughput: 328.9 tokens/s, Avg generation throughput: 28.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs,INFO 01-04 14:15:03 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:15:07 [loggers.py:111] Engine 000: Avg prompt throughput: 266.3 tokens/s, Avg generation throughput: 60.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 12.3%
|
| 131 |
+
INFO: INFO 01-04 14:15:13 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:15:17 [loggers.py:111] Engine 000: Avg prompt throughput: 277.3 tokens/s, Avg generation throughput: 84.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 12.1%
|
| 132 |
+
INFO: INFO 01-04 14:15:23 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:15:27 [loggers.py:111] Engine 000: Avg prompt throughput: 217.5 tokens/s, Avg generation throughput: 101.9 tokens/s, Running: 2 reqs, Waiting: INFO 01-04 14:15:33 [loggers.py:111] Engine 000: Avg prompt thrINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 133 |
+
INFO 01-04 14:15:37 [loggers.py:111] Engine 000: Avg prompt throughput: 472.3 tokens/s, Avg generation throughput: 107.2 tokens/s, Running: 2 reqs, WaitiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 134 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 135 |
+
INFO 01-04 14:15:47 [loggers.py:111] Engine 000: Avg prompt throughput: 443.4 tokens/s, Avg generation throughput: 113.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 13.8%
|
| 136 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 137 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 138 |
+
INFO 01-04 14:15:57 [loggers.py:111] Engine 000: Avg prompt throughput: 688.7 tokens/s, Avg generation throughput: 112.5 tokens/s, Running: 1 reqs, WaitiINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 139 |
+
IINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 140 |
+
INFO 01-04 14:16:07 [loggers.py:111] Engine 000: Avg prompt throughput: 682.3 tokens/s, Avg generation throughput: 80.1 tokens/s, Running: 2 reqs, WaiINFO 01-04 14:16:13 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:16:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.6 tokens/s, Running: 2 reqs, WaINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 141 |
+
INFO 01-04 14:16:23 [loggers.py:111] Engine 000: Avg prompt throughput: 442.8 tokens/s, Avg generation throughput: 55.0 tokens/s, RunnINFO 01-04 14:16:27 [loggers.py:111] Engine 000: Avg prompt throughput: 396.1 tokens/INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 142 |
+
INFO 01-04 14:16:33 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:16:37 [loggers.py:111] Engine 000: Avg prompt throughput: 455.7 tokens/s, Avg generation throughput: 76.7 tokens/s, Running: 2 reqs, WaINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 143 |
+
INFO 01-04 14:16:43 [loggers.py:111] Engine 000: Avg prompt throughput: 736.4 tokens/s, Avg generation throughput: 67.5 tokens/s, RunniINFO 01-04 14:16:47 [loggers.py:111] Engine 000: Avg prompt throughput: 371.7 tokenINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 144 |
+
INFO 01-04 14:16:53 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:16:57 [loggers.py:111] Engine 000: Avg prompt throughput: 706.2 tokens/s, Avg generation throughput: 78.8 tokens/s, Running: 2 reqs, INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 145 |
+
INFO 01-INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 146 |
+
INFO 01-04 14:17:07 [loggers.py:111] Engine 000: Avg prompt throughput: 375.2 tokens/s, Avg generation throughput: 95.7 tokens/s, Running: 2 reqs, INFO 01-04 14:17:13 [loggers.py:111] Engine 000: Avg prompt throughput: 587.3 tokens/s, Avg generation throughput: 48.8 tokens/s, Running: INFO 01-04 14:17:17 [loggers.py:111] Engine 000: Avg prompt throughput: 509.2 toINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 147 |
+
INFO 01-04 14:17:23 [loggers.py:111] Engine 000: Avg prompt throughput: 299INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 148 |
+
INFO 01-04 14:17:27 [loggers.py:111] Engine 000: Avg prompt throughput: 728.8 INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 149 |
+
INFO 01-04 14:17:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0INFO 01-04 14:17:37 [loggers.py:111] Engine 000: Avg prompt throughput: 261.6 tokens/s, Avg generation throughput: 109.0 tokens/s, Running: 3 INFO 01-04 14:17:43 [loggers.py:111] Engine 000: Avg prompt throughput: 526.8 INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 150 |
+
INFO 01-04 14:17:47 [loggers.py:111] Engine 000: Avg prompt throughput: 66INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 151 |
+
INFO 01-04 14:17:53 [loggers.py:111] Engine 000: Avg prompt throughput: 520.6 tokens/s, Avg generation throughput: 106.3 tokens/s, Running: 2 reqINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 152 |
+
INFO 01-04 14:17:57 [loggers.py:111] Engine 000: Avg prompt throughput: 68INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 153 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 154 |
+
INFO 01-04 14:18:03 [loggers.py:111] Engine 000: Avg prompt throughput: 424.7 tokens/s, Avg generation throughput: 60.6 tokens/s, Running: 0 reqINFO 01-04 14:18:07 [loggers.py:111] Engine 000: Avg prompt throughput: 293INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 155 |
+
INFO 01-04 14:18:13 [loggers.py:111] Engine 000: Avg prompt throughput: 717.0 tokens/s, Avg generation throughput: 23.1 tokens/s, Running: 0 rINFO 01-04 14:18:17 [loggers.py:111] Engine 000: Avg prompt throughput: 523.IINFO 01-04 14:18:23 [loggers.py:111] Engine 000: Avg prompt throughput: 406.4 tokens/s, Avg generation throughput: 4.8 tokens/s, Running: 1 reINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 156 |
+
INFO 01-04INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 157 |
+
INFO 01-04 14:18:33 [loggers.py:111] Engine 000: Avg prompt throughput: 543.4 tokens/s, Avg generation throughput: 48.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit raINFO 01-0INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 158 |
+
INFO 01-04 14:18:43 [loggers.py:111] Engine 000: Avg prompt throughput: 531.7 tokens/s, Avg generation throughput: 56.5 tokens/s, Running: 1 reqINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 159 |
+
INFO 01-04INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 160 |
+
INFO 01-04 14:18:53 [loggers.py:111] Engine 000: Avg prompt throughput: 655.8 tokens/s, Avg generation throughput: 44.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit raINFO 01-0INFO 01-04 14:19:03 [loggers.py:111] Engine 000: Avg prompt throughput: 624.2 tokens/s, Avg generation throughput: 62.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cache hit ratINFO: INFO 01-04 14:19:13 [loggers.py:111] Engine 000: Avg prompt throughput: 702.0 tokens/s, Avg generation throughput: 117.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.5%, Prefix cache hit rate: 10.0%
|
| 161 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1INFO 01-0INFO 01-04 14:19:23 [loggers.py:111] Engine 000: Avg prompt throughput: 710.5 tokens/s, Avg generation throughput: 115.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.8%, Prefix cache hit rate: 9.7%
|
| 162 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 163 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1"INFO 01-INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 164 |
+
INFO 01-04 14:19:33 [loggers.py:111] Engine 000: Avg prompt throughput: 356.9 tokens/s, Avg generation throughput: 68.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rateINFO 01INFO 01-04 14:19:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 9INFOINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 165 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 166 |
+
INFO 01-04 14:19:53 [loggers.py:111] Engine 000: Avg prompt throughput: 549.1 tokens/s, Avg generation throughput: 45.5 tokens/s, Running: 0 reqs, WINFO 01-04 14:19:57 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:20:03 [loggers.py:111] Engine 000: Avg prompt throughput: 580.3 tokens/s, Avg generation throughput: 27.8 tokens/s, Running: 1 reqs, INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 167 |
+
INFO INFO 01-04 14:20:13 [loggers.py:111] Engine 000: Avg prompt throughput: 510.5 tokens/s, Avg generation throughput: 63.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.7%, Prefix cache hit rate: 9INFO 01-04 14:INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1"INFO 01-04 14:20:23 [loggers.py:111] Engine 000: Avg prompt throughput: 472.4 tokens/s, Avg generation throughput: 91.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: INFO INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 168 |
+
INFO 01-04 14:20:33 [loggers.py:111] Engine 000: Avg prompt throughput: 724.7 tokens/s, Avg generation throughput: 108.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.4%, Prefix cache hit rate: 8.8ININFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 169 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 170 |
+
INFO 01-04 14:20:43 [loggers.py:111] Engine 000: Avg prompt throughput: 567.8 tokens/s, Avg generation throughput: 74.5 tokens/s, Running: 1 reqs, WaINFO 01-04 14:20:47 [loggers.py:111] Engine 000: Avg prompt throughput: 500.0 tokens/s, Avg generation throughput: 88.6 tokens/s, RunniINFO 01-04 14:20:53 [loggers.py:111] Engine 000: Avg prompt throughput: 278.0 tokensINFO 01-04 14:20:57 [loggers.py:111] Engine 000: Avg prompt throughput: 799.4 tokens/s, Avg generation throughput: 95.8 tokens/s, RunniINFO 01-04 14:21:03 [loggers.py:111] Engine 000: Avg prompt throughput: 307.5 tokens/s, Avg generation throughput: 67.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 9.9INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 171 |
+
ININFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 172 |
+
INFO 01-04 14:21:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 32.7 tokens/s, Running: 0 reqs, Waiting:INFO 01-04 14:21:17 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:21:23 [loggers.py:111] Engine 000: Avg prompt throughput: 639.5 tokens/s, Avg generation throughput: 40.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 9.7%
|
| 173 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 174 |
+
INFO 01-04 14:21:33 [loggers.py:111] Engine 000: Avg prompt throughput: 431.7 tokens/s, Avg generation throughput: 52.3 tokens/s, Running: 1 reqs, Waiting:INFO 01-04 14:21:37 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:21:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 9.6%
|
| 175 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 176 |
+
INFO 01-04 14:21:53 [loggers.py:111] Engine 000: Avg prompt throughput: 533.9 tokens/s, Avg generation throughput: 73.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 9.4%
|
| 177 |
+
INFO 01-04 14:22:03 [loggers.py:111] Engine 000: Avg prompt throughput: 376.2 tokens/s, Avg generation throughput: 69.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache hit rate: 9.3%
|
| 178 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 179 |
+
INFO 01-04 14:22:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 72.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 9.3%
|
| 180 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 181 |
+
INFO 01-04 14:22:23 [loggers.py:111] Engine 000: Avg prompt throughput: 502.6 tokens/s, Avg generation throughput: 63.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 9.2%
|
| 182 |
+
INFO: INFO 01-04 14:22:27 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:22:33 [loggers.py:111] Engine 000: Avg prompt throughput: 740.4 tokens/s, Avg generation throughput: 29.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GINFO 01-04 14:22:37 [loggers.py:111] Engine 000: Avg prompt throughput: 845.2 tokens/s, Avg generation throughput: 72.8INFO 01-04 14:22:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generationINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 183 |
+
INFO 01-04 14:22:47 [loggers.py:111] Engine 000: AvgINFO 01-04 14:22:53 [loggers.py:111] Engine 000: Avg prompt throughput: 697.8 tokens/s, Avg generation throughput: 0.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 8.9%
|
| 184 |
+
INFO 01-04 14:23:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 8.9%
|
| 185 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 186 |
+
INFO 01-04 14:23:13 [loggers.py:111] Engine 000: Avg prompt throughput: 830.0 tokens/s, Avg generation throughput: 30.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 8.7%
|
| 187 |
+
INFO 01-04 14:23:23 [loggers.py:111] Engine 000: Avg prompt throughput: 188.6 tokens/s, Avg generation throughput: 84.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 8.6%
|
| 188 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 189 |
+
INFO 01-04 14:23:33 [loggers.py:111] Engine 000: Avg prompt throughput: 822.6 tokens/s, Avg generation throughput: 95.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 8.6%
|
| 190 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 191 |
+
INFO 01-04 14:23:43 [loggers.py:111] Engine 000: Avg prompt throughput: 641.4 tokens/s, Avg generation throughput: 86.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 8.4%
|
| 192 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 193 |
+
INFO 01-04 14:23:53 [loggers.py:111] Engine 000: Avg prompt throughput: 483.3 tokens/s, Avg generation throughput: 93.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.8%, Prefix cache hit rate: 8.3%
|
| 194 |
+
INFO 01-04 14:24:03 [loggers.py:111] Engine 000: Avg prompt throughput: 496.9 tokens/s, Avg generation throughput: 100.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.1%, Prefix cache hit rate: 8.2%
|
| 195 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 196 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 197 |
+
INFO 01-04 14:24:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 111.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 8.2%
|
| 198 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 199 |
+
INFO 01-04 14:24:23 [loggers.py:111] Engine 000: Avg prompt throughput: 1449.0 tokens/s, Avg generation throughput: 46.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.8%, Prefix cache hit rate: 8.0%
|
| 200 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 201 |
+
INFO 01-04 14:24:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 71.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 8.0%
|
| 202 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 203 |
+
INFO 01-04 14:24:43 [loggers.py:111] Engine 000: Avg prompt throughput: 581.9 tokens/s, Avg generation throughput: 49.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:24:47 [loggers.py:111] Engine 00INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 204 |
+
INFO 01-04 14:24:53 [loggers.py:111] Engine 000: Avg prompt throughput: 759.9 tokens/s, Avg generation throuINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 205 |
+
INFO 01-04 14:24:57 [loggers.py:111] Engine INFO 01-04 14:25:03 [loggers.py:111] Engine 000: Avg prompt throughput: 728.7 tokens/s, Avg generation throughput: 50.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.1%, Prefix cache hit rate: 7.6%
|
| 206 |
+
INFO: 10.46.17.192INFO 01-04 14:25:07 [loggers.py:111] Engine INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 207 |
+
INFO 01-04 14:25:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughINFO 01-04 14:25:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughINFO 01-04 14:25:23 [loggers.py:111] Engine 000: Avg prompt throughput: 686.7 tokens/s, Avg generation throughput: 36.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cacheINFO 01-04 14:25:27 [loggers.py:111] EnginINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 208 |
+
INFO 01-04 14:25:33 [loggers.py:111] Engine 000: Avg prompt throughput: 893.4 tokens/s, Avg generation throughpINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 209 |
+
INFO 01-04 14:25:37 [loggers.py:111] EngINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 210 |
+
INFO 01-04 14:25:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: INFO 01-04 14:25:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thINFO 01-04 14:25:53 [loggers.py:111] Engine 000: Avg prompt throughput: 353.3 tokens/s, Avg generation throughput:INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 211 |
+
INFO 01-04 14:25:57 [loggers.py:111] EngINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 212 |
+
INFO 01-04 14:26:03 [loggers.py:111] Engine 000: Avg prompt throughput: 358.7 tokens/s, Avg generation throughputINFO 01-04 14:26:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throINFO 01-04 14:26:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput:INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 213 |
+
INFO 01-04 14:26:17 [loggers.py:111] EINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 214 |
+
INFO 01-04 14:26:23 [loggers.py:111] Engine 000: Avg prompt throughput: 786.4 tokens/s, Avg generation throughput: INFO 01-04 14:26:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thINFO 01-04 14:26:33 [loggers.py:111] Engine 000: Avg prompt throughput: 885.5 tokens/s, Avg generation throughput:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 215 |
+
INFO 01-04 14:26:37 [loggers.py:111] Engine 000: Avg prompt throughput: 210.6 tokens/s, Avg generation thINFO 01-04 14:26:43 [loggers.py:111] Engine 000: Avg prompt throughput: 816.2 tokens/s, Avg generation throughput:INFO 01-04 14:26:47 [loggers.py:111] Engine 000: Avg prompt throughput: 546.2 tokens/s, Avg generation throughput: 55.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU INFO 01-04 14:26:53 [loggers.py:111] Engine 000: AvINFO 01-04 14:26:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPUINFO 01-04 14:27:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tINFO 01-04 14:27:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 216 |
+
INFO 01-04 14:27:13 [loggers.py:111] Engine 000: Avg INFO 01-04 14:27:17 [loggers.py:111] Engine 000: Avg prompt throughput: 643.6 tokens/s, Avg generation throughput: 89.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 217 |
+
INFO 01-04 14:27:23 [loggers.py:111] Engine 000: AvINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 218 |
+
INFO 01-04 14:27:27 [loggers.py:111] Engine 000: Avg prompt throughput: 655.0 tokens/s, Avg generation INFO 01-04 14:27:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 219 |
+
INFO 01-04 14:27:37 [loggers.py:11INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 220 |
+
INFO 01-04 14:27:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.2INFO 01-04 14:27:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatioINFO 01-04 14:27:53 [loggers.py:111] Engine 000: Avg prompt throughput: 673.6 tokens/s, Avg generation throughput: 29INFO 01-04 14:27:57 [loggers.py:111] Engine 000: Avg prompt throughput: 895.3 tokens/s, Avg generatioINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 221 |
+
INFO 01-04 14:28:03 [loggers.py:111] Engine 000: AINFO 01-04 14:28:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU INFO 01-04 14:28:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.INFO 01-04 14:28:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generationINFO 01-04 14:28:23 [loggers.py:111] Engine 000: Avg prompt throughput: 1382.2 tokens/s, Avg generation throughput: 3INFO 01-04 14:28:27 [loggers.py:111] Engine 000: Avg prompt throughput: 985.3 tokens/s, Avg generation throughput: 35.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KINFO 01-04 14:28:33 [loggers.py:111] Engine 000: AINFO 01-04 14:28:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPUINFO 01-04 14:28:43 [loggers.py:111] Engine 000: Avg prompt throughput: 686.6 tokens/s, Avg generation throughput: 3INFO 01-04 14:28:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation tINFO 01-04 14:28:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46INFO 01-04 14:28:57 [loggers.py:111] Engine 000: Avg prompt throughput: 1087.4 tokens/s, Avg generatiINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 222 |
+
INFO 01-04 14:29:03 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:29:07 [loggers.py:111] Engine 000: Avg prompt throughput: 666.7 tokens/s, Avg generation throughput: 44.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs,INFO 01-04 14:29:13 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:29:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 91.0 tokens/s, Running: 2 reqs, Waiting: 0 reqsINFO 01-04 14:29:23 [loggers.py:111] Engine 000: Avg promINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 223 |
+
INFO 01-04 14:29:27 [loggers.py:111] Engine 000: Avg prompt throughput: 1102.8 tokens/s, Avg geINFO 01-04 14:29:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 91.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, PreINFO 01-04 14:29:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg gINFO 01-04 14:29:43 [loggers.py:111] Engine 000: Avg prompt throughput: 877.5 tokens/s, Avg generation throughput: 63.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:29:47 [loggers.py:111] Engine 000: Avg prompt throughput: 481.5 tokens/s, Avg generation throughpuINFO 01-04 14:29:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 60.3 tokensINFO 01-04 14:29:57 [loggers.py:111] Engine 000: AINFO 01-04 14:29:57 [loggers.py:111] EnginINFO 01-04 14:30:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cacheINFO 01-04 14:30:07 [loggers.py:111] Engine 000: Avg prompt throughput: 886.6 tokens/s, Avg generation throuINFO 01-04 14:30:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughputINFO 01-04 14:30:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thrINFO 01-04 14:30:23 [loggers.py:111] Engine 000: Avg prompt throughput: 459.3 tokens/s, Avg generation throughputINFO 01-04 14:30:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 224 |
+
INFO 01-04 14:30:33 [loggers.py:111] Engine 00INFO 01-04 14:30:37 [loggers.py:111] Engine 000: Avg prompt throughput: 662.1 tokens/s, Avg generation throughput: 46.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KVINFO 01-04 14:30:43 [loggers.py:111] Engine 000: Avg prompt throughput: 899.9 tokens/s, Avg generation throughputINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 225 |
+
INFO 01-04 14:30:47 [loggers.py:111] EngiINFO 01-04 14:30:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, RuINFO 01-04 14:30:57 [loggers.py:111] Engine 00INFO 01-04 14:30:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thrINFO 01-04 14:31:03 [loggers.py:111] Engine 000: Avg prompt throughpINFO: 10.43.30.4:0 - "POST /v1/completioINFO 01-04 14:31:07 [loggers.py:111] Engine 000: Avg prompt throughput: 731.0 tokens/s, Avg generation INFO 01-04 14:31:13 [loggers.py:111] Engine 000: Avg prompt throughput: 520.4 tokens/s, Avg generation throughput: 1INFO 01-04 14:31:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation tINFO 01-04 14:31:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, RunnINFO 01-04 14:31:27 [loggers.py:111] Engine 000: INFO 01-04 14:31:27 [loggers.py:111INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 226 |
+
INFO 01-04 14:31:33 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:31:37 [loggers.py:111] Engine 000:INFO 01-04 14:31:37 [loggers.py:111] Engine 000: Avg prompt throughput: 434.5 tokens/s, Avg generatiINFO 01-04 14:31:43 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:31:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, WINFO 01-04 14:31:53 [loggers.py:111] Engine 000: Avg prompt throughput: 962.1 tokens/s, Avg generation throughput: 34.4 INFO: 10.46.5INFO 01-04 14:31:57 [loggers.py:111] Engine 000: INFO 01-04 14:31:57 [loggers.py:1INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 227 |
+
INFO 01-04 14:32:03 [loggers.py:111] Engine 000: Avg prompt throughput:INFO: 10.43.30.4:0 - "POST /v1/completions INFO 01-04 14:32:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatioINFO 01-04 14:32:13 [loggers.py:111] Engine 000: Avg prompt throughput: 597.0 tokens/s, Avg generation throughput: 39INFO 01-04 14:32:37 [loggers.py:111] Engine 000: Avg prompt throughput: 694.9 tokens/s, Avg generation throughput: 26.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 14.3%
|
| 228 |
+
INFO 01-04 14:3INFO 01-04 14:32:57 [loggers.py:111] Engine 000: Avg prompt throughput: 843.1 tokens/s, Avg generation throughput: 18.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 13.7%
|
| 229 |
+
INFO 01-04 14:33:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 43.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 13.7%
|
| 230 |
+
INFO 01-04 14:33:17 [loggers.py:111] Engine 000: Avg prompt throughput: 1062.2 tokens/s, Avg generation throughput: 87.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.1%, Prefix cache hit rate: 13.5%
|
| 231 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 232 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 233 |
+
INFO 01-04 14:33:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 55.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cacINFO 01-04 14:33:27INFO 01-04 14:33:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/INFO 01-04 14:34:03 [loggers.py:111] Engine 000: Avg prompt throughput: 770.6 tokens/s, Avg generation throughput:INFO 01-04 14:34:07INFO 01-04 14:34:47 [loggers.py:111] Engine 000: Avg prompt throughput: 497.2 tokens/s, Avg generation throughput: 36.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 13.4%
|
| 234 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 235 |
+
INFO 01-04 14:34:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 5.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.4%
|
| 236 |
+
INFO 01-04 14:35:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.4%
|
| 237 |
+
INFO 01-04 14:36:17 [loggers.py:111] Engine 000: Avg prompt throughput: 927.3 tokens/s, Avg generation throughput: 9.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 14.4%
|
| 238 |
+
INFO 01-04 14:36:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 14.4%
|
| 239 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 240 |
+
INFO 01-04 14:36:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 8.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.4%
|
| 241 |
+
INFO 01-04 14:36:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.4%
|
| 242 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 243 |
+
INFO 01-04 14:37:37 [loggers.py:111] Engine 000: Avg prompt throughput: 1222.8 tokens/s, Avg generation throughput: 37.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.1%
|
| 244 |
+
INFO 01-04 14:37:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.1%
|
| 245 |
+
INFO 01-04 14:38:57 [loggers.py:111] Engine 000: Avg prompt throughput: 1079.6 tokens/s, Avg generation throughput: 1.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 13.9%
|
| 246 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 247 |
+
INFO 01-04 14:39:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 20.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.9%
|
| 248 |
+
INFO 01-04 14:39:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.9%
|
| 249 |
+
INFO 01-04 14:41:07 [loggers.py:111] Engine 000: Avg prompt throughput: 1279.1 tokens/s, Avg generation throughput: 43.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.2%, Prefix cache hit rate: 14.6%
|
| 250 |
+
INFO 01-04 14:41:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.4%, Prefix cache hit rate: 14.6%
|
| 251 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 252 |
+
INFO 01-04 14:41:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.6%
|
| 253 |
+
INFO 01-04 14:41:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.6%
|
| 254 |
+
INFO 01-04 14:42:27 [loggers.py:111] Engine 000: Avg prompt throughput: 524.2 tokens/s, Avg generation throughput: 32.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 14.5%
|
| 255 |
+
INFO 01-04 14:42:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 14.5%
|
| 256 |
+
INFO 01-04 14:42:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 14.5%
|
| 257 |
+
INFO 01-04 14:42:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 14.5%
|
| 258 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 259 |
+
INFO 01-04 14:43:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 33.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.5%
|
| 260 |
+
INFO 01-04 14:43:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.5%
|
| 261 |
+
INFO 01-04 14:45:47 [loggers.py:111] Engine 000: Avg prompt throughput: 456.2 tokens/s, Avg generation throughput: 40.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 14.4%
|
| 262 |
+
INFO 01-04 14:45:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 14.4%
|
| 263 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 264 |
+
INFO 01-04 14:46:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 36.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.4%
|
| 265 |
+
INFO 01-04 14:46:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.4%
|
| 266 |
+
INFO 01-04 14:51:47 [loggers.py:111] Engine 000: Avg prompt throughput: 519.4 tokens/s, Avg generation throughput: 19.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 15.0%
|
| 267 |
+
INFO 01-04 14:51:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 15.0%
|
| 268 |
+
INFO 01-04 14:52:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 15.0%
|
| 269 |
+
INFO 01-04 14:52:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 15.0%
|
| 270 |
+
INFO 01-04 14:52:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 15.0%
|
| 271 |
+
INFO 01-04 14:52:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 15.0%
|
| 272 |
+
INFO 01-04 14:52:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 15.0%
|
| 273 |
+
INFO 01-04 14:52:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 15.0%
|
| 274 |
+
INFO 01-04 14:53:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix INFO 01-04 14:58:43 [loggers.py:111] Engine 000: Avg prompt throughput: 459.2 tokens/s, Avg generation throughput: 46.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 11.6%
|
| 275 |
+
INFO 01-04 14:58:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 11.6%
|
| 276 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 277 |
+
INFO 01-04 14:59:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 34.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.6%
|
| 278 |
+
INFO 01-04 14:59:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.6%
|
| 279 |
+
00: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 15.0%
|
| 280 |
+
INFO 01-04 14:54:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.7%, Prefix cache hit rate: 15.0%
|
| 281 |
+
INFO: INFO 01-04 15:08:45 [loggers.py:111] Engine 000: Avg prompt throughput: 536.7 tokens/s, Avg generation throughput: 20.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 18.8%
|
| 282 |
+
INFO 01-04 15:08:55 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 18.8%
|
| 283 |
+
INFO 01-04 15:09:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, WaitiINFO 01-04 15:09:46 [loggers.py:111] Engine 000: Avg prompt throughput: 547.7 tokens/s, Avg generation throughput: 10.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 16.9%
|
| 284 |
+
INFO 01-04 15:09:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 16.9%
|
| 285 |
+
INFO 01-04 15:10:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 16.9%
|
| 286 |
+
INFO 01-04 15:10:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 16.9%
|
| 287 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 288 |
+
INFO 01-04 15:10:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 36.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.9%
|
| 289 |
+
INFO 01-04 15:10:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.9%
|
| 290 |
+
tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.2%
|
| 291 |
+
cache hit rate: 20.2%
|
| 292 |
+
INFO 01-04 14:45:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 20.2%
|
| 293 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 294 |
+
INFO 01-04 14:46:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 19.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.2%
|
| 295 |
+
INFO 01-04 14:46:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.2%
|
| 296 |
+
INFO 01-04 14:47:26 [loggers.py:111] Engine 000: Avg prompt throughput: 826.0 tokens/s, Avg generation throughput: 26.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 20.0%
|
| 297 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 298 |
+
INFO 01-04 14:47:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 1.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.0%
|
| 299 |
+
INFO 01-04 14:47:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.0%
|
| 300 |
+
INFO 01-04 14:57:46 [loggers.py:111] Engine 000: Avg prompt throughput: 561.0 tokens/s, Avg generation throughput: 26.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 20.3%
|
| 301 |
+
INFO 01-04 14:57:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 20.3%
|
| 302 |
+
INFO 01-04 14:58:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 20.3%
|
| 303 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 304 |
+
INFO 01-04 14:58:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 29.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.3%
|
| 305 |
+
INFO 01-04 14:58:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.3%
|
hf_ip/vllm_gpu5.log
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 2 |
+
warnings.warn(
|
| 3 |
+
INFO 01-04 13:13:39 [__init__.py:239] Automatically detected platform cuda.
|
| 4 |
+
INFO 01-04 13:13:42 [api_server.py:1043] vLLM API server version 0.8.5
|
| 5 |
+
INFO 01-04 13:13:42 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8006, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
|
| 6 |
+
INFO 01-04 13:13:50 [config.py:717] This model supports multiple tasks: {'reward', 'score', 'embed', 'generate', 'classify'}. Defaulting to 'generate'.
|
| 7 |
+
INFO 01-04 13:13:50 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
|
| 8 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 9 |
+
warnings.warn(
|
| 10 |
+
INFO 01-04 13:13:55 [__init__.py:239] Automatically detected platform cuda.
|
| 11 |
+
INFO 01-04 13:13:58 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
|
| 12 |
+
WARNING 01-04 13:13:58 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7f9660db2860>
|
| 13 |
+
INFO 01-04 13:13:59 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
|
| 14 |
+
INFO 01-04 13:13:59 [cuda.py:221] Using Flash Attention backend on V1 engine.
|
| 15 |
+
WARNING 01-04 13:13:59 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
|
| 16 |
+
INFO 01-04 13:13:59 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
INFO 01-04 13:15:40 [loader.py:458] Loading weights took 100.81 seconds
|
| 35 |
+
INFO 01-04 13:15:41 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 101.065262 secondININFO 01-04 13:15:55 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
|
| 36 |
+
INFO 01-04 13:15:55 [backends.py:430] Dynamo bytecode transform time: 14.50 s
|
| 37 |
+
INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later usININFO 01-04 13:16:50 [backends.py:148] Compiling a graph for general shape takes 53.44 ININFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 67.94 s in total
|
| 38 |
+
INFO 01-04 13:17:54 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
|
| 39 |
+
INFO 01-04 13:17:54 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94ININFO 01-04 13:18:33 [gpu_model_runner.py:1686] Graph capturing finished in 39 secs, took 1.21 GiB
|
| 40 |
+
INFO 01-04 13:18:33 [core.py:159] init engine (profile, create kv cache, warmup model) took 171.95 seconds
|
| 41 |
+
INFO 01-04 13:18:33 [core_client.py:439] Core engine process 0 ready.
|
| 42 |
+
WARNING 01-04 13:18:33 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
|
| 43 |
+
INFO 01-04 13:18:33 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 44 |
+
INFO 01-04 13:18:33 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 45 |
+
INFO 01-04 13:18:33 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8006
|
| 46 |
+
INFO 01-04 13:18:33 [launcher.py:28] Available routes are:
|
| 47 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /openapi.json, Methods: HEAD, GET
|
| 48 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /docs, Methods: HEAD, GET
|
| 49 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: HEAD, GET
|
| 50 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /redoc, Methods: HEAD, GET
|
| 51 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /health, Methods: GET
|
| 52 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /load, Methods: GET
|
| 53 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /ping, Methods: POST, GET
|
| 54 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /tokenize, Methods: POST
|
| 55 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /detokenize, Methods: POST
|
| 56 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/models, Methods: GET
|
| 57 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /version, Methods: GET
|
| 58 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
|
| 59 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/completions, Methods: POST
|
| 60 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/embeddings, Methods: POST
|
| 61 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /pooling, Methods: POST
|
| 62 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /score, Methods: POST
|
| 63 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/score, Methods: POST
|
| 64 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
|
| 65 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /rerank, Methods: POST
|
| 66 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/rerank, Methods: POST
|
| 67 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v2/rerank, Methods: POST
|
| 68 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /invocations, Methods: POST
|
| 69 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /metrics, Methods: GET
|
| 70 |
+
INFO: Started server process [1317730]
|
| 71 |
+
INFO: Waiting for application startup.
|
| 72 |
+
INFO: Application startup compIININFO 01-04 14:00:01 [loggers.py:111] Engine 000: Avg prompt throughput: 55.5 tokens/s, Avg generation throughput: 37.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.4%, Prefix cache hit rate: 0.IIIINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 73 |
+
INFO 01-04 14:00:11 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 0 reqs, WaitINFO 01-04 14:00:15 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:00:21 [loggers.py:111] Engine 000: Avg prompt throughput: 51.9 tokens/s, Avg generation throughput: 5.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.2%, Prefix cache hit rate: 0.0%IINFO 01-04 14:00:31 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.4%, Prefix cache hit rate: 0.0%
|
| 74 |
+
IINFOINFO 01-04 14:00:36 [loggers.py:111] Engine 000: Avg INFO 01-04 14:00:41 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs,ININFO 01-04 14:00:46 [loggers.py:111] Engine 000: Avg INFO 01-04 14:00:51 [loggers.py:111] Engine 000: Avg prompt throughput: 137.8 tokens/s, Avg generation throughput: 82.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GINFO 01-04 14:00:56 [loggers.py:111] Engine 000: Avg INFO 01-04 14:01:01 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 141.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 0.0%
|
| 75 |
+
INFO: INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1"INFO: ININFO 01-04 14:01:06 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:01:11 [loggers.py:111] Engine 000: Avg prompt throughput: 62.2 tokens/s, Avg generation throughput: 71.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/INFO 01-04 14:01:21 [loggers.py:111] Engine 000: Avg prompt throughput: 83.6 tokens/s, Avg generation throughput: 110.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 0.0%
|
| 76 |
+
INFO: INFO 01-04 14:01:26 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:01:31 [loggers.py:111] Engine 000: Avg prompt throughput: 66.5 tokens/s, Avg generation throughput: 133.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 0.4%
|
| 77 |
+
INFO:INFOINFO: 10.43.30.5:0 - "POST /v1/completions HTTPINFOINFO 01-04 14:01:41 [loggers.py:111] Engine 000: Avg prompt throughput: 102.8 tokens/s, Avg generation throughput: 108.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate:INFO: INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 78 |
+
INFO 01-04 14:01:51 [loggers.py:111] Engine 000: Avg prompt throughput: 91.3 tokens/s, Avg generation throughput: 149.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate:INFO 01-04 14:01:57 [loggers.py:111] Engine 000: Avg prompt througINFOINFO 01-04 14:02:01 [loggers.py:111] Engine 000: Avg prompt throughput: 98.0 tokens/s, Avg generation throughput: 106.6 tokens/s, Running: 3 reqs, INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 79 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 80 |
+
INFO 01INFO 01-04 14:02:11 [loggers.py:111] Engine 000: Avg prompt throughput: 93.4 tokens/s, Avg generation throughput: 102.9 tokens/s, Running: 3 reqs, Waiting: 0 INFO 01-04 14:02:16 [loggers.py:111] Engine 000: Avg INFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompt throughpuININFO 01-04 14:02:21 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg geneINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1INFO: INFINFO 01-04 14:02:26 [loggers.py:111] Engine 000: Avg ININFO 01-04 14:02:27 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:02:31 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 73.1 tokens/s, Running: 1 reqs, INFO: INFO 01-04 14:02:36 [loggers.py:111] Engine 000: Avg prompINFINFO 01-04 14:02:41 [loggers.py:111] Engine 000: Avg prompt throughput: 132.5 tokens/s, Avg generation throughput: 83.6 tokens/s, Running: 2 reqs, Waiting: 0INFO 01-04 14:02:45 [loggers.py:111] Engine 000: Avg prompt tINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 81 |
+
INFO 01-04 14:02:51 [loggers.py:111] Engine 000: Avg prompt throughput: 116.2 tokens/s, Avg generation throughput: 102.9 tokens/s, Running: 2 reqs, WaitingINFO 01-04 14:02:56 [loggers.py:111] Engine 000: Avg prompt throINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 82 |
+
INFO 01-04 14:03:01 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 79.8 tokens/s, Running: 1 reqs, WaitingINFO 01-04 14:03:06 [loggers.py:111] Engine 000: Avg prompINFO 0INFO 01-04 14:03:11 [loggers.py:111] Engine 000: Avg prompt throughput: 83.3 tokens/s, Avg generation throughput: 81.7 tokens/s, Running: 2 reqs, Waiting:INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200INFOINFO 01-04 14:03:16 [loggers.py:111] Engine 000: Avg pINFO 0INFO 01-04 14:03:21 [loggers.py:111] Engine 000: Avg prompt throughput: 176.4 tokens/s, Avg generation throughput: 70.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 7.3%
|
| 83 |
+
INFO: IINFO 01-04 14:03:26 [loggers.py:111] Engine 000: AvINFO 0INFO 01-04 14:03:31 [loggers.py:111] Engine 000: Avg prompt throughput: 79.2 tokens/s, Avg generation throughput: 71.9 tokens/s, Running: 2 reqs, Waiting: 0 reINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 2INFO: INFINFO 01-04 14:03:36 [loggers.py:111] Engine 000: Avg INFOINFO 01-04 14:03:41 [loggers.py:111] Engine 000: Avg prompt throughput: 118.3 tokens/s, Avg generation throughput: 90.1 tokens/s, Running: 2 reqs, Waiting: 0 rIINFO 01-04 14:03:46 [loggers.py:111] Engine 000: Avg prompt throughput: 110.7 tokens/s, Avg generation throughput: 56.INFOINFO 01-04 14:03:51 [loggers.py:111] Engine 000: Avg prompt throughput: 144.2 tokens/s, Avg geneINFO 01-04 14:03:56 [loggers.py:111] Engine 000: Avg prompt throughput: 143.6 tokens/s, Avg generation throughput: 114.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: INFINFO:INFO: 10.46.17.192:0 -INFO: 10.46.50.192:0 - "POST /vINFO 01-04 14:04:01 [loggers.py:111] Engine 000: Avg prompt throughput: 78.9 tokens/s, Avg generINFO 01-04 14:04:05 [loggers.py:111] Engine 000: Avg prompt throughput: 107.3 tokens/s, Avg generation throughput: 127.9 tINFO 01-04 14:04:11 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO 01-04 14:04:16 [loggers.py:111] Engine 000: Avg prompt throughput: 117.2 tokens/s, Avg generation throughput: 107.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: ININFO 01-04 14:04:21 [loggers.py:INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 84 |
+
INFO 01-04 14:04:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 140.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usaINFO INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 85 |
+
INFO 01-04 14:04:31 [loggers.py:111INFO 01-04 14:04:36 [loggers.py:111] Engine 000: Avg prompt throughput: 145.7 tokens/s, Avg generation throughput: 100.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usINFOINFO 01-04 14:04:41 [loggers.py:111INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 86 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 87 |
+
INFO 01-04 14:04:46 [loggers.py:111] Engine 000: Avg prompt throughput: 188.8 tokens/s, Avg generation throughINFOINFO 01-04 14:04:51 [loggers.py:111] Engine 000: Avg prompt throughput: 125.8 tokens/s, Avg generation thINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 2INFO 0INFO 01-04 14:04:56 [loggers.py:111] Engine 000INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 88 |
+
INFO 01-04 14:05:01 [loggers.py:111] Engine 000: Avg prompt throughput: 93.6 tokens/s, Avg generation thrINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 89 |
+
INFO 01-04 14:05:06 [loggers.py:111] Engine 00INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 90 |
+
INFO 01-04 14:05:11 [loggers.py:111] Engine 000: Avg prompt throughput: 202.6 tokens/s, Avg generation thrINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 91 |
+
INFO 01-04 14:05:16 [loggers.py:111] Engine 000: Avg prompt throughput: 205.8 tokens/s, Avg generation throughpuINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 92 |
+
INFO 01-04 14:05:21 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatiINFO 01-INFO 01-04 14:05:26 [loggers.py:111] Engine 000: Avg prompt throughput: 181.3 tokens/s, Avg generation throughINFO 01-04 14:05:31 [loggers.py:111] Engine 000: Avg prompt throughput: 248.7 tokens/s, Avg generaINFO: INFO: 10.45.190.192:0 - "POST /v1/INFO: 10.43.30.3:0 - "POSINFO 01-04 14:05:36 [loggers.py:111] EngineINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 93 |
+
INFO 01-04 14:05:41 [loggers.py:111] Engine 000: Avg prompt throughput: 128.7 tokens/s, Avg generation INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 94 |
+
INFO 01-04INFO 01-04 14:05:46 [loggers.py:111] EngiINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 95 |
+
INFO 01-04 14:05:51 [loggers.py:111] Engine 000: Avg prompt throughput: 161.7 tokens/s, Avg generatioINFO 01-04INFO 01-04 14:05:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation througINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 96 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 97 |
+
INFO 01-04 14:06:01 [loggers.py:1INFO: INFO 01-04 14:06:06 [loggers.py:111] Engine 000: Avg prompt throughput: 294.6 tokens/s, Avg generation throughput: 75.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cachINFO: 10.46.50.192:0 - "POST /v1/compleINFO: 10.46.50.192:INFO 01-04 14:06:11 [loggers.pyINFO 01-04 1INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 98 |
+
INFO 01-04 14:06:16 [loggers.py:111] Engine 000: Avg prompt throughput: 207.7 tokens/s, Avg generation througINFO 01-04 14:06:21 [loggers.py:111] Engine 000: Avg prompt throughput: 375.8 tokens/s, Avg generINFO 01-04 14:06:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 INFO 01-04 14:06:26 [loggers.py:111] Engine 000: Avg prompt throughput: 216.5 tokens/s, Avg generation throughpINFO 01-04 14:06:31 [loggers.pINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 99 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 100 |
+
INFO 01-04 14:INFO 01-04 14:06:36 [loggers.py:111] Engine 000: AvINFO 01-04 14:06:36 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:06:41 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughINFO 01-04 14:06:46 [loggers.py:111] Engine 000: Avg prompt throughput: 233.9 tokens/s, Avg generation throughput: 67.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cachINFO 01-04 14:06:51 [loggers.py:111] Engine 000: Avg prompt throughput: 312.3 tokens/s, Avg generation througINFO 01-04 14:06:56 [loggers.py:111] Engine 000: Avg prompt throughput: 241.7 tokens/s, Avg generation throughINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 101 |
+
INFO 01-04 14:07:01 [loggers.py:111] Engine 000: Avg prompt throughput: 210.1 tokens/s, Avg generation throuINFO 01-04 14:07:06 [loggers.py:111] Engine 000: Avg prompt throughput: 208.1 tokens/s, Avg generation throughpINFO 01-04 14:07:06 [loggers.py:111] Engine 000: Avg prompt tINFINFO: 10.46.17.192:0 - "POST /v1/compleINFO: 10.46.17.192:INFO 01-04 14:07:11 [loggers.py:INFO: 1INFO 01-04 14:07:16 [loggers.py:111] Engine 000: Avg prompt throughput: 204.3 tokens/s, Avg generation throughpuINFO 01-04INFO 01-04 14:07:18 [loggers.py:111] Engine 000: Avg INFO 01-04 14:07:21 [loggers.py:111] EngineINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 102 |
+
INFO 01-04 14:07:26 [loggers.py:111] Engine 000INFO 01-04 14:07:26 [loggers.py:111] Engine 000: Avg prompt throughput: 177.INFO: 10.46.17.192:0 - "POST /v1/completions INFO 01-04 14:07:31 [loggers.py:111] Engine INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 103 |
+
INFO 01-04 14:07:36 [loggers.py:111] Engine 000: Avg prompt throughput: 522.4 tokens/s, Avg generation thrINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 104 |
+
INFO 01-04 14:07:41 [loggers.py:111] Engine 000: Avg prompt throughput: 328.3 tokens/s, AvgINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 105 |
+
INFO 01-04 14:07:43 [INFO 01-04 14:07:46 [loggers.py:111] Engine 000INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1"ININFO 01-04 14:07:51 [loggers.py:111] Engine 000: Avg prompt throughput: 208.1 tokens/s, Avg generation throughpINFO: 10.45.190.192:0 - "POST /v1/completINFO 01-04 14:07:53 [lINFO 01-04 14:07:56 [loggers.py:111] EngiINFO 01-04 14:08:01 [loggers.py:111] Engine 000: Avg prompt throughput: 506.5 tokens/s, Avg generation throughput: 74.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 14.5%
|
| 106 |
+
INFO: 10.46.50.192:0INFO 01-04 14:08:06 [loggers.py:111] Engine 000: Avg prompt throughput: 303.7 tokens/s, Avg generation throuINFO 01-04 14:08:11 [loggers.py:111] Engine 000: Avg prompt throughput: 299.0 tokens/s, INFO 01-04 14:08:13 [loggers.py:111] Engine 000: Avg prompt throughput: 307.5 tokens/s, INFO 01-04 14:08:16 [loggers.py:111] Engine 000: Avg prompt throughput: 224.7 tokens/s, Avg generationINFO 01-INFO: 10.45INFO INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 107 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 108 |
+
INFO 01-04 14:08:23 [logINFO 01-04 14:08:26 [loggers.py:111] Engine 000: Avg prompt throughput: 211.0 tokens/s, Avg generatINFO 01-04 14:08:27 [loggerINFO 01-04 14:08:28 [loggers.py:111] Engine 000: INFO 01-04 14:08:31INFO 01-04 14:08:33 [loggers.py:111] Engine 000: Avg prompt throughput: 257.0 tokens/s, AINFO 01-04 14:08:36 [loggers.py:111] Engine 000: Avg prompt throughput: 331.1 tokens/s, Avg generatioINFO 01-INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 109 |
+
INFO 01-04 14:08:41 INFO 01-04 14:08:43 [loggeINFO 01-04 14:08:46 [loggers.py:111] Engine 000: Avg prompt throughput: 416.6 tokens/s, Avg generation throughput: 60.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV caINFO 01-04 14:08:51 [loggers.py:111] Engine 000: Avg prompt throughput: 571.5 tokens/s, Avg generation throughpuINFO 01-04 14:08:56 [loggers.py:111] Engine 000: Avg prompt throughput: 386.5 tokens/s, Avg generation throughput: 73.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV caINFO 01-04 14:09:01 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/INFO 01-04 14:09:03 [loggersINFO: 10.46.50.192:0 - "POST /v1/completionsINFO 01-04 14:09:0INFO 01-04 14:09:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thrINFO 01-04 14:09:11 [loggers.py:111] Engine 000: Avg prompt throughput: 436.0 tokensINFO 01-04 14:09:13 [loggersINFO 01-04 14:09:16 [loggers.py:111] Engine 000: Avg prompt throughput: 177.9 tokens/s, Avg generation throughput:INFO 01-04 14:09:16 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:09:21 [loggers.py:111] Engine 000INFO: 10.45.190.192:0 - "POST /vINFO 01-04 14:09:23 [loggers.py:111] Engine 000: Avg prompt throughput: 339.9 tINFO: 10.43.INFO 01-04 14:09:26 [loggers.py:111] EnINFO 01-04 14:09:31 [loggers.py:111] Engine 000: Avg prompt throughput: 183.5 tokensINFO 01-04 14:09:33 [loggers.py:111] Engine 000: Avg prompt throughput: 300.8 tokens/s, Avg geneINFO 01-04 14:09:36 [loggers.py:111] Engine 000: Avg prompt throughput: 255.0 tokens/s, Avg generation INFO 01-04 14:09:41 [loggers.py:111] Engine 000: Avg prompt throughput: 624.3 tokenINFO 01-04 14:09:43 [loggers.py:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 110 |
+
INFO 01-04 14:09:46 [loggers.py:111] Engine 000: Avg prompt throughput: 263.6 tokens/s, Avg generation throughput: 5INFO 01-04 14:09:46 [loggers.py:111] Engine 000: Avg pINFO: 10.4INFO: 10.43.30.3:0 - "POST /v1/INFO 01-04 14:09:56 [loggers.py:111] Engine 000: Avg prompt throughput: 248.6 tokens/s, Avg generation throughput: 62.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usaINFO: 10.46.17.192:0 - "POST /v1/coINFO 01-04 14:10:06 [loggers.py:111] Engine 000: Avg prompt throughput: 445.9 tokens/s, Avg generation throughput: 105.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 9.8%
|
| 111 |
+
INFO: 10.46.17.192:0 - INFO: 10.46.17.192:0 - "POST /v1/coINFO: 10.46.50.192:0 - INFO 01-04 14:10:13 [loggers.py:111] EnINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 112 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 113 |
+
INFO 01-04 14:10:16 [loggers.py:111] INFO 01-04INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 114 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 115 |
+
INFO 01-04 14:10:23 [loggers.py:111] EngiINFO 01-04 14:10:26 [loggers.py:111] Engine 000: Avg prompt throughput: 401.7 tokens/s, Avg generatioINFO 01-04 14:10:31 [loggers.py:111] Engine 000: Avg prompt throughput: 393.8 tokens/s, Avg generation throughput: 66.INFO: 10.43.30.3:0 - "INFO 01-04 14:10:33 [loggers.py:111] EINFO 01-04 14:10:36 [loggers.py:111] INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 116 |
+
INFO 01-04 14:10:41 [loggers.py:111] Engine 000: Avg prompt throughput: 240.5INFO: 10.46.17.192:0 - "POST /v1/coINFO 01-04 14:10:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thrINFO 01-04 14:10:51 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: INFO 01-04 14:10:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KVINFO: 10.46.50.192:0 - "POST /v1/completions INFO 01-04 14:11:06 [loggers.py:111] Engine 000: Avg prompt throughput: 401.6 tokens/s, Avg generation throughput: 79.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usagINFO 01-04 14:11:06 [loggers.py:111] EINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 117 |
+
INFO 01-04 14:11:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 79.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KINFO 01-04 14:11:21 [loggers.py:111] Engine 000: AINFO: 10.43.30.3:INFO 01-04 14:11:43 [loggers.py:111] Engine 000: Avg prompt throughput: 317.7 tokens/s, Avg generation throughput: 18.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 8.8%
|
| 118 |
+
INFO 01-04 14:11:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 8.8%
|
| 119 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 120 |
+
INFO 01-04 14:12:03 [loggers.py:111] Engine 000: Avg prompt throughput: 324.5 tokens/s, Avg generation throughput: 64.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 8.5%
|
| 121 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 122 |
+
INFO 01-04 14:12:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 1.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 8.5%
|
| 123 |
+
INFO 01-04 14:12:23 [loggers.py:111] Engine 000: Avg prompt throughput: 367.4 tokens/s, Avg generation throughput: 29.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 8.2%
|
| 124 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 125 |
+
INFO 01-04 14:12:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 33.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 8INFOINFO 01-04 14:12:43 [loggers.py:111] Engine 000: Avg prompt throughput: 590.1 tokens/s, Avg generation throughput: 57.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 7.8%
|
| 126 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 127 |
+
INFO 01-04 14:12:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 7.INFINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 128 |
+
INFO 01-04 14:13:03 [loggers.py:111] Engine 000: Avg prompt throughput: 340.6 tokens/s, Avg generation throughput: 41.0 tokens/s, Running: 1 reqs, WaitINFO 01-04 14:13:07 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:13:13 [loggers.py:111] Engine 000: Avg prompt throughput: 415.2 tokens/s, Avg generation throughput: 57.6 tokens/s, Running: 2 reqs, WaiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 129 |
+
INFO 01-04 14:13:17 [loggers.py:111] Engine 000: Avg prompt throughpINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 130 |
+
INFO 01-04 14:13:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s,INFO 01-04 14:13:27 [loggers.py:111] Engine 000: Avg prompt throughput: 369.0 tokens/s, Avg generation throughput: 37.7 tokens/s, RINFO 01-04 14:13:33 [loggers.py:111] Engine 000: Avg prompt throughput: 292.2 tokens/s, Avg generation throughput: 13.8 tokens/s, Running: 1 reqs, WaitingINFO 01-04 14:13:37 [loggers.py:111] Engine 000: Avg prompt throINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 131 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 132 |
+
INFO 01-04 14:13:43 [loggINFO 01-04 14:13:47 [loggers.py:111] Engine 000: Avg prompt throughput: 520.6 tokens/s, Avg generation throughput: 59.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, PreINFO 01-04 14:13:53 [loggers.py:111] Engine 000: Avg prompt throughput: 255.1 tokens/s, Avg generation throughput: 23.3 tokens/s, Running: 1 reqs, Waiting: 0 INFO 01-04 14:13:57 [loggers.py:111] Engine 000: Avg prompt INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 133 |
+
INFO 01-04 14:14:03 [loggers.py:111] Engine 000: Avg prompt throughput: 410.0 tokens/s, Avg genINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 134 |
+
INFO 01-04 14:14:07 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:14:13 [loggers.py:111] Engine 000: Avg prompt throughput: 406.3 tokens/s, Avg generation throughput: 72.7 tokens/s, Running: 2 reqs, Waiting: 0 INFO 01-04 14:14:17 [loggers.py:111] Engine 000: Avg prompt throughput: 382.7 tokens/s, Avg generation throughput: 35.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, INFO 01-04 14:14:23 [loggers.INFO 01-04 14:14:27 [loggers.py:111] Engine 000: Avg prompt throughput: 666.4 tokens/s, Avg generation throughput: 94.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%,INFO 01-04 14:14:33 [loggers.py:111] Engine 000: Avg prompt throughput: 565.0 tokens/s, Avg generation throughput: 85.4 tokens/s, Running: 2 reqs, Waiting: 0 reqINFO 01-04 14:14:37 [loggers.py:111] Engine 000: Avg promINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 135 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 136 |
+
INFO 01-04 14:14:43 [loggers.py:111] Engine 000: Avg prompt throughput: 713.0 tokens/s, Avg generINFO 01-04 14:14:47 [loggers.py:111] Engine 000: Avg prompt throughput: 506.8 tokens/s, Avg generation throughput: 69.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%,INFO: 10.46.50.192:0 - "POINFO 01-04 14:14:57 [loggers.py:111] Engine 000: Avg prompt throughput: 605.6 tokens/s, Avg generation throughput: 98.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.2%, Prefix cache hit rate: 10.8%
|
| 137 |
+
INFO: 10.46.50.192:0 - "POST /INFO 01-04 14:15:03 [loggers.py:111] Engine 000: Avg prompt throughput: 521.6 tokens/s, Avg geneINFO 01-04 14:15:07 [loggers.py:111] Engine 000: Avg prompt throughput: 390.4 tokens/s, Avg generation throughput: 111.3 toINFO 01-04 14:15:13 [loggers.py:111] Engine 000: Avg prompt throughput: 337.7 tokens/s, Avg generation throughput: 72.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs,INFO 01-04 14:15:17 [loggers.py:111] Engine 000: Avg prINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 138 |
+
INFO 01-04 14:15:23 [loggers.py:111] Engine 000: Avg prompt throughput: 991.8 tokens/s, Avg generation throughput: 109.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.7%, Prefix cache hit rate: 9.7%
|
| 139 |
+
INFO: 1INFO 01-04 14:15:27 [loggers.py:111] Engine 000: Avg prINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 140 |
+
INFO 01-04 14:15:33 [loggers.py:111] Engine 000: Avg prompt throughput: 209.0 tokens/s, Avg generation throughput: 82.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, INFO 01-04 14:15:37 [loggers.py:111] Engine 000: Avg pINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 141 |
+
INFO 01-04 14:15:43 [loggers.py:111] Engine 000: Avg prompt throughput: 343.3 tokens/s, Avg generation throughput: 105.4 tokens/s, Running: 2 reqs, Waiting: 0 reqsINFO 01-04 14:15:47 [loggers.py:111] Engine 000: Avg prompt throughput: 284.9 tokens/s, Avg generation throughput: 42.2 toINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 142 |
+
INFO 01-04 14:15:53 [loggers.pyINFO 01-04 14:15:57 [loggers.py:111] Engine 000: Avg prompt throughput: 343.0 tokens/s, Avg generation throughput: 58.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9INFO 01-04 14:16:03 [loggers.py:INFO 01-04 14:16:07 [loggers.py:111] Engine 000: Avg prompt throughput: 287.0 tokens/s, Avg generation throughput: 100.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4INFO 01-04 14:16:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation INFO 01-04 14:16:17 [loggers.py:111] Engine 000: Avg prompt throughput: 676.6 tokens/s, Avg generation throughput: INFO 01-04 14:16:23 [loggers.py:111] Engine 000: Avg prompt throughput: 501.5 tokens/s, Avg generation throughput: 105.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 143 |
+
INFO 01-04 14:16:27 [loggers.py:111] Engine 000: AvINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 144 |
+
INFO 01-04 14:16:33 [loggers.py:111] Engine 000: Avg prompt throughput: 657.6 tokens/s, Avg generation INFO 01-04 14:16:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 87.INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 145 |
+
INFO: 10.45.190.192:0 - "POST /INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 146 |
+
INFO 01-04 14:16:47 [loggers.py:111] Engine 000: Avg prompt throughput: 489.5 tokens/s, Avg generation throughput: 119.6 tokens/s, Running: 2 reqs, Waiting: 0 reqsINFO 01-04 14:16:46 [INFO 01-04 14:16:53 [loggers.py:111]INFO 01-04 14:16:57 [loggers.py:111] Engine 000: Avg prompt throughput: 863.9 tokens/s, Avg generation throughput: 145.6 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usagINFO: 10.43.30.5:0 - "POST /v1/comINFO: 10.46.17.192:0 -INFO 01-04 14:17:03 [loggers.py:111] Engine 000: Avg prompt throughput: 542.5 tokens/s, Avg generation throughput: 74.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV caINFO 01-04 14:17:07 [loggers.py:111] Engine 00INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 147 |
+
INFO 01-04 14:17:13 [loggers.py:111] Engine 000: Avg prompt throughput: 515.9 tokens/s, Avg generation throughput: 73.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cacINFO: 10.43.30.4:0 - "INFO 01-04 14:17:16 [loggers.py:111] Engine 000: Avg prompt throughput: 517.0 tokenINFO 01-04 14:17:23 [loggers.py:111] Engine 000: Avg prompt throughput: 624.5 tokens/s, Avg generation throughput: 56.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:17:27 [loggers.py:111] Engine 000INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 148 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 149 |
+
INFO 01-04 14:17:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation througINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 150 |
+
INFO 01-04 14:17:37 [loggers.py:111] Engine INFO 01-04 14:17:43 [loggers.py:111] Engine 000: Avg prompt throughput: 452.7 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cachINFO 01-04 14:17:47 [loggers.py:111] Engine 000: Avg prompt throughput: 530.9 tokens/s, Avg generation throuINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 151 |
+
INFO 01-04 14:17:53 [loggers.py:111] Engine 000: Avg prompt throughput: 441.2 tokens/s, Avg generation throughputINFO: INFO: 10.46.50INFO 01-04 14:17:56 [loggers.py:111] EngINFO 01-04 14:17:57 [loggers.py:111] INFINFO 01-04 14:18:03 [loggers.py:111] Engine 000: Avg prompt throughput: 517.5 tokens/s, Avg generation throughput: 24.8 tokens/s, Running: INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 152 |
+
INFO 01-04 14:INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 153 |
+
INFO 01-04 14:18:13 [loggers.py:111] Engine 000: Avg prompt throughput: 617.8 tokens/s, Avg generation throughput: 65.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usagINFO 01-04 14:18:17 [loggers.py:111] EINFO 01-04 14:18:23 [loggers.py:111] Engine 000: Avg prompt throughput: 426.4 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 12.1%
|
| 154 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 155 |
+
INFO 01-04 14:18:33 [loggers.py:111] Engine 000: Avg prompt throughput: 429.2 tokens/s, Avg generation throughput: 74.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6INFO: 10.46.50.192:0 - "POSTINFO 01-04 14:18:43 [loggers.py:111] Engine 000: Avg prompt throughput: 668.4 tokens/s, Avg generation throughput: 93.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.6%, Prefix cache hit rate: 12.5%
|
| 156 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 157 |
+
INFO 01-04 14:18:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 120.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 12.5%
|
| 158 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 159 |
+
INFO 01-04 14:19:03 [loggers.py:111] Engine 000: Avg prompt throughput: 1429.8 tokens/s, Avg generation throughput: 101.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.8%, Prefix cache hit rate: 13.0%
|
| 160 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 161 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 162 |
+
INFO 01-04 14:19:13 [loggers.py:111] Engine 000: Avg prompt throughput: 580.7 tokens/s, Avg generation throughput: 115.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.0%, Prefix cache hit rate: 12.7%
|
| 163 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 164 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 165 |
+
INFO 01-04 14:19:23 [loggers.py:111] Engine 000: Avg prompt throughput: 664.3 tokens/s, Avg generation throughput: 77.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2INFO 01-04 14:19:25 [loggers.py:11INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 166 |
+
INFO 01-04 14:19:33 [loggers.py:111] Engine 000: Avg prompt throughput: 837.0 tokens/s, Avg generation throughput: 50.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usINFO: 10.43.30.3:0 - "POST /v1/complINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 167 |
+
INFO 01-04 14:19:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.4 tINFO 01-04 14:19:45 INFO 01-04 14:19:46 [loggers.py:111] Engine 000: Avg prompt throughput: 1119.2INFO 01-04 14:19:53 [loggers.py:111] Engine 000: Avg prompt throughput: 595.3 tokens/s, Avg generation throughput: 18.2 tINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 168 |
+
INFO 01-04 14:19:55INFO 01-04 14INFO 01-04 14:20:03 [loggers.py:111] Engine 000: Avg prompt throughput: 760.4 tokens/s, Avg generation throughput: 63.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.6INFO 01-04 14:20:05 [loggers.py:INFO: 10.43.30.5:0 - "PINFO 01-04 14:20:07 [loggINFO 01-04 1INFO 01-04 14:20:13 [loggers.py:111] Engine 000: Avg prompt throughput: 804.7 tokens/s, Avg generation throughput: 65.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.4%INFO 01-04 14:20:15 [loggers.py:111] Engine 000: Avg prompt throughput: 334.0 tokens/s, Avg generINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 169 |
+
INFO 01-04 14:20:23 [loggers.py:111] Engine 000: Avg prompt throughput: 204.6 tokens/s, Avg generation throughput: 60.3 tokenINFO 01-04 14:20:25 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:20:27 [loggers.py:11INFO 01-04 14:20:33 [loggers.py:111] Engine 000: Avg prompt throughput: 437.5 tokens/s, Avg generation throughput: 65.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, PrINFO 01-04 14:20:35 [loggerINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 170 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 171 |
+
INFO 01-04 14:20:43 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:20:45 [loggers.py:111] Engine 000: Avg prompt throughput: 622.2 tokens/s, Avg generation throughput: 62.4 tokens/s, Running: 2 reqs, Waiting:INFO 01-04 14:20:53 [loggers.py:111] Engine 000: Avg prompt throughput: 559.0 tokens/s, Avg generation throughput: 55.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, PrefINFO 01-04 14:20:55 [loggINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 172 |
+
INFO 01-04 14:21:03 [loggers.py:111] Engine 000: Avg prompt throughput: 632.5 tokens/s, Avg generation throughput: 64.5 tokens/sINFO 01-04 14:21:05 [loggers.py:111] Engine 000: Avg prompt throughput: 512.3 tokens/s, AvgINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 173 |
+
INFO 01-04 14:21:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 73.2 tokens/s, RINFO 01-04 14:21:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvINFO: 10.43.30.5:0 - "POST /v1/cINFO 01-04 14:21:17 [loggersINFO 01-04 14:21:23 [loggers.py:111] Engine 000: Avg prompt throuINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 174 |
+
INFO 01-04 14:21:25 [loggers.py:111] Engine 000: Avg prompt INFO: 10.43.30.5:0 - "POINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 175 |
+
INFO 01-04 14:21:33 [loggers.py:111] Engine 000: Avg prompt throughput: 423.7 tokens/s, Avg generation throughput: 25.1 tokens/s, INFO 01-04 14:21:35 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:21:37 [loggers.pyINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 176 |
+
INFO 01-04 14:21:43 [loggers.py:111] Engine 000: Avg prompt thrINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 177 |
+
INFO 01-04 14:21:45 [loggers.py:111] Engine 000: Avg prompt throughput: 566.0 tokensINFO 0INFO 01-04 14:21:53 [loggers.py:111] Engine 000: Avg prompt throughput: 600.6 tokens/s, Avg generation throughput: 38.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, PreINFO 01-04 14:21:55 [loggeINFO: 10.46.50.192:0 - "POST /v1INFO: 10.46.17.192:0 - "POINFO 01-04 14:22:03 [loggers.py:111] Engine 000: Avg prompt throughput: 313.1 tokens/s, Avg generation throughput: 34.4 tokens/INFO 01-04 14:22:05 [loggers.py:111] Engine 000: Avg prompt throughput: 671.4 tokens/s, Avg INFO 01-04 14:22:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, PrefiINFO 01-04 14:22:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/sINFO: INFO 01-04 14:22:23 [loggers.py:111] Engine 000: Avg prompt throughput: 679.5 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, PrefINFO 01-04 14:22:25 [loggers.py:111] Engine 000: Avg prompt throughput: 902.9 tokens/s, Avg INFO 01-04 14:22:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4.4 tokens/s, INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 178 |
+
INFO 01-04 14:22:35 [lINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 179 |
+
INFO 01-04 14:22:43 [loggers.py:111] Engine 000: Avg prompt throughput: 773.5 tokens/s, Avg generation throughput: 21.2 tokens/s, RINFO 01-04 14:22:45 [loggers.py:111] Engine 000: Avg prompt throughput: 664.9 tokens/s, INFO 01-04 14:22:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RunnINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 180 |
+
INFO 01-04 14:22:55 [INFO 01-04 14:23:03 [loggers.py:111] Engine 000: Avg prompt throughput: 794.3 tokens/s, Avg generation throughput: 42.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cINFO 01-04 14:23:05 [INFO 01-04 14:23:13 [loggers.py:111] Engine 000: Avg prompt throughput: 561.7 tokens/s, Avg generation throughput: 71.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.8%, PrefixINFO 01-04 14:23:15 [loINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 181 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 182 |
+
INFO 01-04 14:23:23 [loggers.py:111] Engine 000: Avg prompt throughput: 767.6 tokens/s, Avg generation throughput: 89.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cachINFO 01-04 14:23:2INFO 01-04 14:23:33 [loggers.py:111] Engine 000: Avg prompt throughput: 509.8 tokens/s, Avg generation throughput: 76.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cacINFO 01-04 14:23:35 [loggers.py:111] Engine 000: Avg prompt throughput: 702.6 tokens/INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 183 |
+
INFO 01-04 14:23:43 [loggers.py:111] Engine 000: Avg prompt throughput: 505.9 tokens/s, Avg generation throughput: 48.9 tokens/s, RunnINFO 01-04 14:23:45 [loggers.py:111] Engine 000: Avg prompt throughput: 765.8 tokens/INFO 01-04 14:23:53 [loggers.py:111] Engine 000: Avg prompt throughput: 540.5 tokens/s, Avg generation throughput: 48.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cacheINFO 01-04 14:23:INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 184 |
+
INFO 01-04 14:24:03 [loggers.py:111] Engine 000: Avg prompt throughput: 810.6 tokens/s, Avg generation throughput: 75.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache hINFO 01-04 14:2INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 185 |
+
INFO 01-04 14:24:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 85.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hINFO: 10.46INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 186 |
+
INFO 01-04 14:24:23 [loggers.py:111] Engine 000: Avg prompt throughput: 870.9 tokens/s, Avg generation throughput: 46.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 16.5%
|
| 187 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 188 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 189 |
+
INFO 01-04 14:24:33 [loggers.py:111] Engine 000: Avg prompt throughput: 772.6 tokens/s, Avg generation throughput: 58.9 tokens/s, Running: 0 reINFO 01-04 14:24:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 INFO 01-04 14:24:43 [loggers.py:111] Engine 000: Avg prompt throughput: 511.3 tokens/s, Avg generation throughput: 27.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit INFO 01-04 1INFO 01-04 14:24:53 [loggers.py:111] Engine 000: Avg prompt throughput: 758.4 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cache hit INFO 01-04 1INFO 01-04 14:25:03 [loggers.py:111] Engine 000: Avg prompt throughput: 539.0 tokens/s, Avg generation throughput: 91.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.8%, Prefix cache hiINFO 01-04 14:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 190 |
+
INFO 01-04 14:25:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 108.4 tokens/s, Running: 2INFO 01-04 14:25:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 191 |
+
INFO 01-04 14:25:23 [loggers.py:111] Engine 000: Avg prompt throughput: 826.9 tokens/s, Avg generation throughput: 113.8 tokens/s, RunningINFO 01-04 14:25:25 [loggers.py:111] Engine 000: Avg prompt throughput: 777.0 tokeINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 192 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 193 |
+
INFO 01INFO 01-04 14:25:35 [loggers.py:111] Engine 000: Avg prompt throughput: 747.1 tokens/s, Avg generation throughput: 102.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.2%, Prefix cache hiINFO: INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 194 |
+
INFO 01-04 14:25:43 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:25:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 111.7 tokens/s, Running: 1 reINFINFO 01-04 14:25:53 [loggers.py:111] Engine 000: Avg prompt throughput: 6INFO 01-04 14:25:55 [loggers.py:111] Engine 000: Avg prompt throughput: 545.0 tokens/s, Avg generation throughput: 90.4 tokens/s, Running: 2 reqsINFO 01-04 14:26:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.4 tokens/s, Running: 1 reINFO 01-04 14:26:05 [loggers.py:111] Engine 000: Avg prompt throughput: 377.INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 195 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 196 |
+
INFO 01-04 14:26:13 [loggers.py:111] Engine 000: Avg prompt throughput: 48INFO 01-04 14:26:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 50.2 tokens/s, Running: 1 rINFO INFO 01-04 14:26:23 [loggers.py:111] Engine 000: Avg prompt throughput: 251.7 tokens/s, Avg generation throughput: 41.4 tokens/s, Running:INFO 01-04 14:26:25 [loggers.py:111] Engine 000: Avg prompt throughput: 671.8 tokINFO 01-04 14:26:33 [loggers.py:111] Engine 000: Avg prompt throughput: 505.7 tokens/s, Avg generation throughput: 55.5 tokens/s, Running:INFO 01-04 14:26:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokenINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1INFO 01-0INFO 01-04 14:26:43 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:26:45 [loggers.py:111] Engine 000: Avg prompt throughput: 606.1 tokens/s, Avg generation throughput: 77.7 tokens/s, Running: 2INFO 01INFO 01-04 14:26:53 [loggers.py:111] Engine 000: Avg prompt throughput: 950.0 tokens/s, Avg generation throughput: 60.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache INFO 01-04 14:26INFO 01-04 14:27:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.8%, Prefix cache INFO 01-04 14:27:05 [loggers.py:111] Engine 000: Avg prompt throughput: 5INFO 01-04 14:27:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFO 01-04 14:27:13 [loggers.py:111] Engine 000: Avg prompt throughput: 649.2 tokens/s, Avg generation throughput: 44.7 tokens/s, RunningINFO 01-04 14:27:15 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 INFO 01-04 14:27:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 43.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache INFO 01-04 14:27INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1INFO 01-0INFO 01-04 14:27:33 [loggers.py:111] Engine 000: Avg prompt throughput: 896.8 tokens/s, Avg generation throughput: 51.0 tokens/s, RunningINFO 01-04 14:27:35 [loggers.py:111] Engine 000: Avg prompt throughput: 3INFO 01-0INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 197 |
+
INFO 01-04 14:27:43 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:27:45 [loggers.py:111] Engine 000: Avg prompt throughput: 842.8 tokens/s, Avg generation throughput: 68.9 tokens/s, Running: INFO 01-INFO 01-04 14:27:53 [loggers.py:111] Engine 000: Avg prompt throughput: 596.3 tokens/s, Avg generation throughput: 21.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hiINFO 01-04 14:INFO 01-04 14:28:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hiINFO 01-04 14:INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 198 |
+
INFO 01-04 14:28:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 20.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit ratININFO 01INFO 01-04 14:28:23 [loggers.py:111] Engine 000: Avg prompt throughput: 453.7 tokens/s, Avg generation throughput: 34.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit ratININFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 199 |
+
INFO 01INFO 01-04 14:28:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 2.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 1INFO INFO 01-04 14:28:43 [loggers.py:111] Engine 000: Avg prompt throughput: 566.7 tokens/s, Avg generation throughput: 13.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 17.6%
|
| 200 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 201 |
+
INFO 01-04 14:28:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 21.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.6%
|
| 202 |
+
INFO 01-04 14:29:03 [loggers.py:111] Engine 000: Avg prompt throughput: 659.1 tokens/s, Avg generation throughput: 25.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 18.1%
|
| 203 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 2INFO 0INFO 01-04 14:29:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 14.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 1INFO 01-04 14:29:25 [loggers.py:111] Engine 000: Avg prompt throughput: 686.4 tokens/s, Avg generation throughput: 30.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 22.1%
|
| 204 |
+
INFO 01-04 14:29:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 22.1%
|
| 205 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 206 |
+
INFO 01-04 14:29:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 33.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 22.1%
|
| 207 |
+
IINFO 01-04 14:29:56 [loggers.py:111] Engine 000: Avg prompt throughput: 709.1 tokens/s, Avg generation throughput: 0.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 16.6%IINFO 01-04 14:30:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 16.6%IINFO 01-04 14:30:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 16.6%IINFO 01-04 14:30:26 [loggers.py:111] Engine 000: Avg prompt throughput: 645.4 tokens/s, Avg generation throughput: 61.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.9%, Prefix cache hit rate: 16.4%
|
| 208 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 209 |
+
INFO 01-04 14:30:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 85.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 16.4%
|
| 210 |
+
IINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OKINFO 01-04 14:30:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 24.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.4%
|
| 211 |
+
INFO 01-04 14:30:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.4%
|
| 212 |
+
INFO 01-04 14:31:06 [loggers.py:111] Engine 000: Avg prompt throughput: 663.8 tokens/s, Avg generation throughput: 13.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 17.3%
|
| 213 |
+
INFO 01-04 14:31:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFO 01-04 14:31:23 [loggers.py:111] Engine 000: Avg prompt throughput: 518.0 tokens/s, Avg generation throughput: 39.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rINFO 01-04 INFO 01-04 14:31:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit INFO 01-04 1INFO 01-04 14:31:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hitINFO 01-04 14INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 214 |
+
INFO 01-04 14:31:53 [loggers.py:111] Engine 000: Avg prompt throughput: 914.5 tokens/s, Avg generation throughput: 50.9 tokens/s, Running: 1 rINFO 01-04 14:31:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 215 |
+
INFO 01-04 14:32:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 34.7 tokens/s, Running: INFO 01-04 14:32:06 [loggers.py:111] Engine 000: Avg prompt throughput: 927.5 toINFO 01-04 14:32:13 [loggers.py:111] Engine 000: Avg prompt throughput: 777.9 tokens/s, Avg generation throughput: 24.0 tokens/s, Running: 1 reINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 2INFO 01-04 14:32INFO 01-04 14:32:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cacheINFO 01-04 14:32:INFO 01-04 14:32:33 [loggersINFO 01-04 14:32:37 [loggers.py:111] Engine 000: Avg prompt throughput: 560.4 tokens/s, Avg generation throughput: 0.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:32:56 [loggers.py:111] Engine 000: Avg prompt throughput: 611.1 tokens/s, Avg generation throughput: 2.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 17.9%
|
| 216 |
+
INFO 01-04 14:33:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 17.9%
|
| 217 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 218 |
+
INFO 01-04 14:33:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 43.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.9%
|
| 219 |
+
INFO 01-04 14:33:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.9%
|
| 220 |
+
INFO 01-04 14:33:46 [loggers.py:111] Engine 000: Avg prompt throughput: 1091.1 tokens/INFO 01-04 14:34:03 [loggers.py:111] Engine 000: Avg prompt throughput: 497.1 tokens/s, Avg generation throughput: 15.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 17.9%
|
| 221 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 222 |
+
INFO 01-04 14:34:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 18.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usaINFO 01-04 14:34:17 [loggers.py:111] EnINFO 01-04 14:34:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usagINFO 01-04 14:34:27 [loggers.py:111] EINFO 01-04 14:34:33 [loggers.py:111] Engine 000: Avg prompt throughput: 1340.9 tokens/s, Avg generation throughput: 41.7 tokens/s, RINFO 01-04 14:34:46 [loggers.py:111] Engine 000: Avg prompt throughput: 514.1 tokens/s, Avg generation throughput: 14.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 19.0%
|
| 223 |
+
INFO 01-04INFO 01-04 14:35:08 [loggers.py:111] Engine 000: Avg prompt throughput: 854.2 tokens/s, Avg generation throughput: 46.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 17.0%
|
| 224 |
+
INFO 01-04 14:35:18 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 17.0%
|
| 225 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200INFO 01-04 14:35:35 [loggers.py:111] Engine 000: Avg prompt throughput: 958.6INFO 01-04 14:36:03 [loggers.py:111] Engine 000: Avg prompt throughput: 627.6 tokens/s, Avg generation throughput: 27.8 tokens/s, RunINFO 01-04 14:36:16 [loggers.py:111] Engine 000: Avg prompt throughput: 677.4 tokens/s, Avg generation throughput: 28.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 19.8%
|
| 226 |
+
INFO 01-04 14:36:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 19.8%
|
| 227 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 228 |
+
INFO 01-04 14:36:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 8.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.8%
|
| 229 |
+
INFO 01-04 14:36:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/sINFO 01-04 14:37:03 [loggers.py:111] Engine 000: Avg prompt throughput: 535.8 tokens/s, Avg generation throughput: 31.4 tokens/s, RuINFO 01-04 14:37:06 [loggers.py:111] Engine 000: Avg prompt throughput: 504.0 tokens/s,INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 230 |
+
INFO 01-04 14:37:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.1 tokens/s, RunninINFO 01-04 14:37INFO 01-04 14:38:06 [loggers.py:111] Engine 000: Avg prompt throughput: 1079.4 tokens/s, Avg generation throughput: 33.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 18.2%
|
| 231 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OKINFO 01-04 14:38:33 [loggers.py:111] Engine 000: Avg prompt throughput: 573.9 tokens/s, Avg generation throughput: 21.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 17.8%
|
| 232 |
+
INFO 01-04 14:38:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 17.8INFO 01-04 14:39:26 [loggers.py:111] Engine 000: Avg prompt throughput: 380.6 tokens/s, Avg generation throughput: 9.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 18.1%
|
| 233 |
+
INFO 01-04 14:39:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix cache hit rate: 18.1%
|
| 234 |
+
INFO 01-04 14:39:46 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:40:03 [loggers.py:111] Engine 000: Avg prompt throughput: 489.2 tokens/s, Avg generation throughput: 38.8 tokens/s, RunnINFO 01-04 14:40:06 [loggers.py:111] Engine 000: Avg prompt throughput: 1203.8 tokensINFO 01-04 14:40:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, RunninINFO 01-04 14:40:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/INFO 01-04 14:40:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, RunninINFO 01-04 14:40:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tINFO 01-04 14:41:16 [loggers.py:111] Engine 000: Avg prompt throughput: 506.6 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 18.0%
|
| 235 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 236 |
+
INFO 01-04 14:41:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 15.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.0%
|
| 237 |
+
INFO 01-04 14:41:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running:INFO 01-04 14:42:46 [loggers.py:111] Engine 000: Avg prompt throughput: 952.5 tokens/s, Avg generation throughput: 39.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 20.5%
|
| 238 |
+
INFO 01-04 14:42:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 20.5%
|
| 239 |
+
INFO 01-04 14:43:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 20.5%
|
| 240 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 241 |
+
INFO 01-04 14:43:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 40.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.5%
|
| 242 |
+
INFO 01-04 14:43:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokenINFO 01-04 14:44:23 [loggers.py:111] Engine 000: Avg prompt throughput: 652.5 tokens/s, Avg generation throughput: 1.4 tokens/s, RunninINFO 01-04 14:46:36 [loggers.py:111] Engine 000: Avg prompt throughput: 571.7 tokens/s, Avg generation throughput: 28.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 20.4%
|
| 243 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 244 |
+
INFO 01-04 14:46:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.4%
|
| 245 |
+
INFO 01-04 14:46:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.4%
|
| 246 |
+
INFO 01-04 14:54:36 [loggers.py:111] Engine 000: Avg prompt throughput: 426.4 tokens/s, Avg generation throughput: 37.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 20.2%
|
| 247 |
+
INFO 01-04 14:54:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 INFO 01-04 14:55:35 [loggers.py:111] Engine 000: Avg prompt throughput: 431.5 tokens/s, Avg generation throughput: 15.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 25.8%
|
| 248 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 249 |
+
INFO 01-04 14:55:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 29.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 25.8%
|
| 250 |
+
INFO 01-04 14:55:55 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 25.8%
|
| 251 |
+
Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 20.2%
|
| 252 |
+
INFO 01-04 14:55:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 20.2%
|
| 253 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 254 |
+
INFO 01-04 14:55:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 INFO 01-04 15:02:53 [loggers.py:111] Engine 000: Avg prompt throughput: 612.3 tokens/s, Avg generation throughput: 38.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 17.8%
|
| 255 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 256 |
+
INFO 01-04 15:03:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 37.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.8%
|
| 257 |
+
INFO 01-04 15:03:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.8%
|
| 258 |
+
ning: 1 reqs, Waiting: 0 reqs, GPU KVINFO 01-04 14:55:51 [loggers.py:111] Engine 000: Avg prompt throughput: 437.3 tokens/s, Avg generation throughput: 43.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 16.5%
|
| 259 |
+
INFO 01-04 14:56:01 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 16.5%
|
| 260 |
+
INFO 01-04 14:56:11 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 16.5%
|
| 261 |
+
INFO 01-04 14:56:21 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 16.5%
|
| 262 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 263 |
+
INFO 01-04 14:56:31 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.5%
|
| 264 |
+
INFO 01-04 14:56:41 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.5%
|
| 265 |
+
6 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 18.2%
|
| 266 |
+
INFO 01-04 14:55:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput:INFO 01-04 15:07:05 [loggers.py:111] Engine 000: Avg prompt throughput: 518.9 tokens/s, Avg generation throughput: 5.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 19.0%
|
| 267 |
+
INFO 01-04 15:07:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 19.0%
|
| 268 |
+
INFO 01-04 15:07:25 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 19.0%
|
| 269 |
+
INFO 01-04 15:07:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 19.0%
|
| 270 |
+
INFO 01-04 15:07:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 19.0%
|
| 271 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 272 |
+
INFO 01-04 15:07:55 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 5.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.0%
|
| 273 |
+
INFO 01-04 15:08:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.0%
|
hf_ip/vllm_gpu6.log
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 2 |
+
warnings.warn(
|
| 3 |
+
INFO 01-04 13:13:42 [__init__.py:239] Automatically detected platform cuda.
|
| 4 |
+
INFO 01-04 13:13:45 [api_server.py:1043] vLLM API server version 0.8.5
|
| 5 |
+
INFO 01-04 13:13:45 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8007, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
|
| 6 |
+
INFO 01-04 13:13:53 [config.py:717] This model supports multiple tasks: {'classify', 'generate', 'embed', 'reward', 'score'}. Defaulting to 'generate'.
|
| 7 |
+
INFO 01-04 13:13:53 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
|
| 8 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 9 |
+
warnings.warn(
|
| 10 |
+
INFO 01-04 13:13:58 [__init__.py:239] Automatically detected platform cuda.
|
| 11 |
+
INFO 01-04 13:14:01 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
|
| 12 |
+
WARNING 01-04 13:14:01 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7faff3c54e80>
|
| 13 |
+
INFO 01-04 13:14:03 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
|
| 14 |
+
INFO 01-04 13:14:03 [cuda.py:221] Using Flash Attention backend on V1 engine.
|
| 15 |
+
WARNING 01-04 13:14:03 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
|
| 16 |
+
INFO 01-04 13:14:03 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
INFO 01-04 13:15:40 [loader.py:458] Loading weights took 97.35 seconds
|
| 35 |
+
INFO 01-04 13:15:41 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 97.798136 seconds
|
| 36 |
+
INFO 01-04 13:15:55 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
|
| 37 |
+
INFO 01-04 13:15:55 [backends.py:430] Dynamo bytecode transform time: 14.48 s
|
| 38 |
+
INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later use
|
| 39 |
+
INFO 01-04 13:16:50 [backends.py:148] Compiling a graph for general shape takes 53.24 s
|
| 40 |
+
INFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 67.72 s in total
|
| 41 |
+
INFO 01-04 13:17:53 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
|
| 42 |
+
INFO 01-04 13:17:53 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94x
|
| 43 |
+
INFO 01-04 13:18:32 [gpu_model_runner.py:1686] Graph capturing finished in 39 secs, took 1.21 GiB
|
| 44 |
+
INFO 01-04 13:18:32 [core.py:159] init engine (profile, create kv cache, warmup model) took 171.40 seconds
|
| 45 |
+
INFO 01-04 13:18:32 [core_client.py:439] Core engine process 0 ready.
|
| 46 |
+
WARNING 01-04 13:18:32 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
|
| 47 |
+
INFO 01-04 13:18:32 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 48 |
+
INFO 01-04 13:18:32 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 49 |
+
INFO 01-04 13:18:32 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8007
|
| 50 |
+
INFO 01-04 13:18:32 [launcher.py:28] Available routes are:
|
| 51 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /openapi.json, Methods: HEAD, GET
|
| 52 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /docs, Methods: HEAD, GET
|
| 53 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: HEAD, GET
|
| 54 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /redoc, Methods: HEAD, GET
|
| 55 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /health, Methods: GET
|
| 56 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /load, Methods: GET
|
| 57 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /ping, Methods: POST, GET
|
| 58 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /tokenize, Methods: POST
|
| 59 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /detokenize, Methods: POST
|
| 60 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/models, Methods: GET
|
| 61 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /version, Methods: GET
|
| 62 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
|
| 63 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/completions, Methods: POST
|
| 64 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/embeddings, Methods: POST
|
| 65 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /pooling, Methods: POST
|
| 66 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /score, Methods: POST
|
| 67 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/score, Methods: POST
|
| 68 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
|
| 69 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /rerank, Methods: POST
|
| 70 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/rerank, Methods: POST
|
| 71 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /v2/rerank, Methods: POST
|
| 72 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /invocations, Methods: POST
|
| 73 |
+
INFO 01-04 13:18:32 [launcher.py:36] Route: /metrics, Methods: GET
|
| 74 |
+
INFO: Started server process [1318205]
|
| 75 |
+
INFO: Waiting for application startup.
|
| 76 |
+
INFO: Application startup compleIINFINFO 01-04 14:00:02 [loggers.py:111] Engine 000: Avg prompt throughput: 111.2 tokens/s, Avg generation throughput: 85.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, Prefix cache hit rate: 0INFOINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 77 |
+
INFO 01-04 14:00:12 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 89.1 tokens/s, Running: 1 reqs, WIINFO 01-04 14:00:19 [loggers.py:111] Engine 000: Avg prompt througINFINFO 01-04 14:00:22 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, WaINFO 01-04 14:00:29 [loggers.py:111] Engine 000: Avg prompt throughININFO 01-04 14:00:32 [loggers.py:111] Engine 000: Avg prompt throughput: 65.5 tokens/s, Avg generation throughput: 84.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit INFO 01-04 INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OININFO 01-04 14:00:42 [loggers.py:111] Engine 000: Avg prompt throughput: 63.3 tokens/s, Avg generation throughput: 100.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hiINFO: INFO 01-04 14:00:47 [loggers.py:111] Engine 000: Avg prompt INFO 01-04INFO 01-04 14:00:52 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 52.2 tokens/s, Running: 1 reqs, WINFO 01-04 14:00:57 [loggers.py:111] Engine 000: Avg prompt tINFO 01-0INFO 01-04 14:01:02 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:01:02 [loggers.py:111] Engine 000: Avg prompt throughput: 89.3 tokens/s, Avg generation throughput: 34.6 tokens/s, Running: ININFOINFO 01-04 14:01:09 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFINFO 01-04 14:01:12 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 104.2 tokens/s, Running:INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 INFINFO 0INFO INFO 01-04 14:01:22 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hiINFOINFO INFOINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 78 |
+
INFO 01-04 14:01:32 [loggers.py:111] Engine 000: Avg prompt throughput: 87.2 tokens/s, Avg generation throughput: 92.6 tokens/s, RunningINFO 01-04 14:01:36 [loggers.py:111] Engine 000: Avg prompt throughput: 84.INFO 01-04 14:01:37 [loggers.py:111] Engine 000: Avg prompt throughINFINFO 01-04 14:01:42 [loggers.py:111] Engine 000: Avg prompt throughput: 58.0 tokens/s, Avg generation throughput: 73.8 tokens/s, Running:INFO 01-04 14:01:46 [loggers.py:111] Engine 000: Avg prompt throughput:INFOINFO 01-04 14:01:47 [loggers.py:111] Engine 000: Avg prompt throughpINFINFO 01-04 14:01:52 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 79 |
+
INFO: INFO 01-04 14:01:57 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:02:02 [loggers.py:111] Engine 000: Avg prompt throughput: 103.4 tokens/s, Avg generation throughput: 73.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hitINFO: INFINFO 01-04 14:02:13 [loggers.py:111] Engine 000: Avg prompt throughput: 90.8 tokens/s, Avg generation throughput: 131.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 1.1%
|
| 80 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTINFOINFO 01-04INFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:02:23 [loggers.py:111] Engine 000: Avg prompt throughput: 115.3 tokens/s, Avg generation throughput: 90.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit ratINFO 01IINFINFO 01-04 14:02:27 [loggers.py:111] Engine 000: Avg prompt thIINFO 01-04 14:02:33 [loggers.py:111] Engine 000: Avg prompt throughput: 90.0 tokens/s, Avg generation throughput: 69.5 tokens/s, Running: 2 reqs, Waiting: 0INFO 01-04 14:02:37 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:02:42 [loggers.py:111] Engine 000: Avg prompt throughput: 123.1 tokens/s, Avg generation throughput: 110.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 6.2INFO: 10.46.17.192:0 - "POST /v1/completions HTTPININFO 01-04 INFINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 INFO 01-04 14:02:53 [loggers.py:111] Engine 000: Avg prompt throughput: 116.6 tokens/s, Avg generation throughput: 96.2 tokens/s, Running: INFO 01-04 14:02INFO 01-04 14:02:57 [loggers.py:111] Engine 000: Avg prompt thrINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 81 |
+
INFO 01-04 14:03:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 79.3 tokens/s, Running: 1 INFO 01-04 14:03:06 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 ININFO 01-04 14:03:12 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:03:13 [loggers.py:111] Engine 000: Avg prompt throughput: 91.INFO 01-04 14:03:16 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:03:16 [loggers.py:111] Engine 000: Avg prompt throughput: 102.ININFO 01-04 14:03:22 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 INFO 01-04 14:03:26 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:03:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 toINFO 01-04 14:03:32 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:03:33 [loggers.py:111] Engine 000: Avg prompt throughput: 144INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 82 |
+
INFO 01-04 14:03:36 [loggers.py:111] Engine 000: Avg prompt throughput: 144.7 tINFO 01-04 14:03:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.6 tokens/s, Running: 2 INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 83 |
+
INFO INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 84 |
+
INFO 0INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 85 |
+
INFO 01-04 14:03:53 [loggers.py:111] Engine 000: Avg prompt throughput: 138INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 86 |
+
INFO 01-04 14:03:56 [loggers.py:111] Engine 000: Avg prompt throughput: 174.INFO:INFO 01-04 14:04:02 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:04:03 [loggers.py:111] Engine 000: Avg prompt throughput: 100.1 tokens/s, Avg generation throughput: 76.0 tokens/s, Running: 1 reqINFO 01-04 14:04:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1INFO: INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1INFO 01-04 14:04:13 [loggers.py:111] Engine 000: Avg prompt throughput: 131.2 tokens/s, Avg generation throughput: 56.6 tokens/s, Running: 1INFO 01-04 14:04:16 [loggers.py:111] Engine 000: Avg prompt throughput: 109INFO 01-04 14INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1INFO 01-04 14:04:23 [loggers.py:111] Engine 000: Avg prompt throughput: 2INFO 01-04 14:04:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.8 tokens/s, Running: 2 reqINFINFO 01-04 14:04:33 [loggers.py:111] Engine 000: Avg prompt throughput:INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 87 |
+
INFO 01-04 14:04:36 [loggers.py:111] Engine 000: Avg prompt throughput: 132.3 tokens/s, Avg generation throughput: 124.6 tokens/s, Running: 2 rININFO 01-04 14:04:43 [loggers.py:111] Engine 000: Avg prompt throughput: 177.2 tokens/s, Avg generation throughput: 48.3 tokens/s, Running:INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 88 |
+
INFO 01-04 1IINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 89 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 90 |
+
INFO 01-04 14:04:53 [loggers.py:111] Engine 000: Avg prompt throughput: 184.1INFO 01-04 14:04:56 [loggers.py:111] Engine 000: Avg prompt throughput: 145.4 tokens/s, Avg generation throughput: 50.6 tokens/s, Running: 2 INFO 01-04INFO 01-04 14:05:02 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:05:06 [loggers.py:111] Engine 000: Avg prompt throughput: 217.4 tokens/s, Avg generation throughput: 98.8 tokens/s, Running: 3 INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 91 |
+
INFO 01-04 14:05:13 [loggers.py:111] Engine 000: Avg prompt throughput: 182.3INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 92 |
+
INFO 01-04 14:05:16 [loggers.py:111] Engine 000: Avg prompt throughput: 174.4 tokens/s, Avg generation throughput: 119.1 tokens/s, Running: 2 rINFO 01-04 14:05:23 [loggers.py:111] Engine 000: Avg prompt throughput: 186.7 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1INFO: 10INFO 01-04 14:05:25 [loggers.py:111] Engine 000: Avg pINFO 01-04 1INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 93 |
+
INFO 01-04 14:05:33 [loggers.py:111] Engine 000: Avg prompt throughput: 207.8 tokens/s, Avg generation throughput: 66.2 tokens/s, Running: INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 94 |
+
INFO 01-04 1INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 95 |
+
INFO 01-04 14:05:43 [loggers.py:111] Engine 000: Avg prompt throughput: 216.8 tokens/s, Avg generation throughput: 61.3 tokens/s, Running: 1 rINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 96 |
+
INFO 01-04INFO 01-04 14:05:53 [loggers.py:111] Engine 000: Avg prompt throughput: 236.5 tokens/s, Avg generation throughput: 69.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rINFO 01-04INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 97 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 98 |
+
INFO 01-04 14:06:03 [loggers.py:111] Engine 000: Avg prompt throughput: 247.4 tokens/s, Avg generation throughput: 75.4 tokens/s, Running: 1 reqs, WaitinINFO 01-04 14:06:05 [loggers.py:111] Engine 000: Avg pINFO 01-04 INFO: 1INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.INFO 01-04 14:06:13 [loggers.py:111] Engine 000: Avg prompt throughput: 174.4 tokens/s, Avg generation throughput: 59.7 tokens/s, Running: 1INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 99 |
+
INFO 01-04 14:INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 100 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 101 |
+
INFO 01-04 14:06:23 [INFO: 10.46.17.192:0 - "POST /v1/completions HINFO 01-04 14:06:26 [loggers.py:111] Engine 000: Avg prompt throughput: 397.3 tokens/s, Avg generation throughput: 59.9 tokens/s, Running: 2 reqs, INFO 01-04 14:06:33 [loggers.py:111] Engine 000: Avg prompt throughput:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 102 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 103 |
+
INFO 01-04 14:06:36 [loggers.py:111] Engine 000: Avg prompt throughput: 168.3 tokenINFO 01-04 14:06:43 [loggers.py:111] Engine 000: Avg prompt throughput: 245.7 tokens/s, INFO 01-04 14:06:45 [loggers.py:111] Engine 000INFO 01-04 14:06:46 [loggers.py:111] Engine 000: Avg prompt throughput: 227.7 tokensINFO: INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 20INFO:INFO 01-04 14:06:52 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:06:53 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:06:56 [loggers.py:111] Engine 000: Avg prompt throughput: 285.1 tokens/s, AvINFO 01-04 14:07:02 [loggers.py:111] Engine 000: Avg prompt throughput: 178.4 tokens/s, Avg generation throughput: 126.1 tokens/sINFO 01-04 14:07:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg INFO: 10.46.50.192:0 - "POST /v1/coINFO 01-04 14:07:06 [loggerINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 2INFO 01-04 14:07:13 [loggers.py:111] Engine 000: Avg prompt throughput: 213.5 tokens/s, Avg generation throughput: 12.2 tokens/s, INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 104 |
+
INFO 01-04 14:07:16 [loggINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 105 |
+
INFO 01-04 14:07:23 [loggers.py:111] Engine 000: Avg prompt throughput: 244.0 tokens/s, Avg generation throughput: 20.6 tokens/s,INFO 01-04 14:07:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg INFO 01-04 14:07:33 [loggers.py:111] Engine 000: Avg prompt throughput: 210.2 tokens/s, Avg generation throughput: 70.1 tokensINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 106 |
+
INFO 01-04 14:07:36 [loggers.py:111] Engine 000: Avg prompt throughput: 504.4 tokens/s, Avg geINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 107 |
+
INFO 01-04 14:07:43 [loggers.py:111] Engine 000: Avg prompt throughput: 274.2 tokens/s, Avg generation throughput: 65.4 tokeINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 108 |
+
INFO 01-04 14:07:46 [loggers.INFO 01-04 14:07:53 [loggers.py:111] Engine 000: Avg prompt throughput: 247.1 tokens/s, Avg generation throughput: 79.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 13.0%
|
| 109 |
+
INFO: 10.46.17.192:0 - "POST /v1INFO 01-04 14:07:56 [loggers.pINFO 01-04 14:08:03 [loggers.py:111] Engine 000: Avg prompt throughput: 368.9 tokens/s, Avg generation throughput: 57.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%INFO: 10.46.17.192:0 - "POSINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 110 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 111 |
+
INFO 01-04 14:08:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 69.4 tokenINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 112 |
+
INFO 01-04 14:08:16 [loggersINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 113 |
+
INFO 01-04 14:08:23 [loggers.py:111] Engine 000: Avg prompt throughput: 596.5 tokens/s, Avg generation throughput: 45.5 tokeINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 114 |
+
INFO 01-04 14:08:26 [loggers.pyINFO 01-04 14:08:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%,INFO 01-04 14:08:36 [loggers.py:111] Engine 000: Avg prompt throughput: 405.1 tokens/s, Avg geneINFO 01-04 14:08:43 [loggers.py:111] Engine 000: Avg prompt throughput: 305.0 tINFO 01-04 14:08:42 [loggers.py:111] Engine 000: Avg prompt throughput: 281.7 tokens/s, Avg generation throughput: 77.6 tokens/s, Running: 1INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 115 |
+
INFO 01-04 14:08:53 [loggers.py:111] Engine 000: Avg prompt throughput: 318.9 tokens/s, Avg generation throughput: 78.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%,INFO: 10.43.30.4:0 - "POSTINFO: 10.45.190.192:0 - "POST INFO 01-04 14:08:56 [loggers.py:1INFO 01-04 14:09:03 [loggers.py:111] Engine 000: Avg prompt throughput: 235.9 tokens/s, Avg generation throughput: 56.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1INFO 01-04 14:09:06 [loggers.py:11INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 116 |
+
INFO 01-04 14:09:13 [loggers.py:111] Engine 000: Avg prompt throughput: 435.6 tokens/s, Avg generation throughput: 70.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1INFO 01-04 14:09:16 [loggers.py:111] Engine 000: Avg prompt throughput: 186.5 tokens/s, Avg generatiINFO 01-04 14:09:23 [loggers.py:111] Engine 000: Avg prompt throughput: 333.5 tokens/s, Avg generation throughput: 15.INFO 01-04 14:09:26 [loggers.py:111] Engine 000: Avg prompt throughput: 240.7 tokens/s, Avg generatioINFO 01-04 14:09:33 [loggers.py:111] Engine 000: Avg prompt throughput: 391.4 tokens/s, Avg generation throughput: 58INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 117 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 118 |
+
INFO 01-04 14:09:36 [loggers.py:111] EngiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 119 |
+
INFO 01-04 14:09:43 [loggers.py:111] Engine 000: Avg prompt throughput: 360.5 tokens/s, Avg generation throuINFO 01-04 14:09:46 [loggers.py:111] Engine 000: Avg prompt throughput: 673.4 tokens/s, Avg generation throughpINFO 01-04 14:09:53 [loggers.py:111] Engine 000: Avg prompt throughput: 263.7 tokens/s, Avg generation throughput: 47.8 tokens/s, Running:INFO 01-04 14:09:52 [loggers.py:111INFO: 10.45.190.192:0 - "POST /v1/completiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 120 |
+
INFO 01-04 14:10:03 [loggers.py:111] Engine 000: Avg prompt throughput: 377.1 tokens/s, Avg generation throughput: 82.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 21.2%
|
| 121 |
+
INFO: 10.45.1INFO 01-04 14:10:06 [loggers.py:111] Engine 000: AINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 122 |
+
INFO 01-04 14:10:13 [loggers.py:111] Engine 000: Avg prompt throughput: 199.5 tokens/s, Avg generation tINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 123 |
+
INFO 01-04 14:10:16 [loggers.py:111] Engine 000:INFO 01-04 14:10:23 [loggers.py:111] Engine 000: Avg prompt throughput: 370.4 tokens/s, Avg generation throughput: 74.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 22.0%
|
| 124 |
+
INFO: 10.46.5INFO 01-04 14:10:26 [loggers.py:111] Engine 000: INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 125 |
+
INFO 01-04 14:10:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation tINFO 01-04 14:10:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 126 |
+
INFO 01-04 14:10:43 [loggers.py:111] Engine 000: Avg prompt throughput: 376.7 tokens/s, Avg generation INFO 01-04 14:10:46 [loggers.py:111] Engine 000: Avg prompt throughput: 138.2 tokens/s, Avg generation throughput: 2INFO 01-04 14:10:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation tINFO 01-04 14:10:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4INFO 01-04 14:11:03 [loggers.py:111] Engine 000: Avg prompt throughput: 245.0 tokens/s, Avg generation throughput: 33.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPINFO 01-04 14:11:06 [loggers.py:111] Engine 000: Avg INFO 01-04 14:11:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 20.8%
|
| 127 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 128 |
+
INFO 01-04 14:11:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 23.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, INFO 01-04 14:11:26 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:11:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RuINFO 01-04 14:11:46 [loggers.pyINFO 01-04 14:11:46 [loggers.py:111] Engine 000: Avg prompt throughput: 303.1 tokens/s, Avg generation throughput: 26.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 129 |
+
INFO 01-04 14:11:56 [loggerINFO 01-04 14:11:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 9.6 tokens/s,INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OINFO 01-04 14:12:06 [loggINFO 01-04 14:12:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, PrefINFO 01-04 14:12:16 [logINFO 01-04 14:12:16 [loggers.py:111] Engine 000: Avg prompt throughput: 424.1 tokens/s, Avg generation throughput: 31.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, PrefINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 130 |
+
INFO 01-04 14:12:26 [loggeINFO 01-04 14:12:26 [loggers.py:111] Engine 000: Avg prompt throughput: 757.5 tokens/s, Avg generation throughput: 80.4 tokens/s, RuINFO 01-04 14:12:32 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:12:36 [loggers.py:111] Engine 000: Avg prompt throughput: 400.8 tokens/s, Avg INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 131 |
+
INFO 01-04 14:12:36 [loggers.py:111] Engine 000: Avg prompt INFO: INFO 01-04 14:12:42 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:12:46 [loggers.py:111] Engine 000: Avg prompt throughput: 319.9 tokens/s, AINFO 01-04 14:12:46 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:12:52 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:12:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, INFO 01-04 14:12:56 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:13:02 [loggers.py:111] Engine 000: Avg prompt throughput: 260.8 tokens/s, Avg generation throughput: 58.2 tokeINFO 01-04 14:13:06 [INFO 01-04 14:13:06 [loggers.py:111] Engine 000: Avg prompt throughput: 406.1 tokens/s, Avg generation throughput: 49.6 tokens/s, Running: 2 INFO 01-04 14:13:12 [loggers.py:111] Engine 000: Avg prompt throughput: 260.2INFO 01-04 14:13:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rateINFO 01-04 14:13:22 [loggers.py:111] Engine 000: Avg pINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 132 |
+
INFO 01-04 14:13:INFO 01-04 14:13:26 [loggers.py:111] Engine 000: Avg prompt throughput: 187.4 toINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 133 |
+
INFO 01-04 14:13:32 [loggers.py:111] Engine 000: Avg INFO 01-04 14:13:36INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 134 |
+
INFO 01-04 14:13:36 [loggers.py:111] Engine 000: Avg prompt throughput: 645.0 tokINFO 01-04 14:13:42 [loggers.py:111] Engine 000: AvgINFO 01-04 14:13:46 [loggers.py:111] Engine 000: Avg prompt throughput: 186.9 tokens/s, Avg generation throughput: 76.2 tokens/s, Running: 2 reqs, WaitINFO: 10.46.INFO 01-04 14:13:52 [loggers.py:111] Engine 000: AvgINFO 01-04 14:13:56 [loggers.py:111] Engine 000: Avg prompt throughput: 350.4 tokens/s, Avg generation throughput: 118.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPINFO 01-04 14:14:02 [loggers.py:111] Engine 000: Avg INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 135 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 136 |
+
INFO: 10.46.50.192:0 - "POST /v1INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 137 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 138 |
+
INFO 01-04 14:14:12 [loggers.py:111] Engine 000: Avg prompt throughput: 354.7 tokens/s, Avg generation throughput: 159.INFO 01-04 14:14:16 [loggers.py:111] Engine 000: Avg prompt throughput: 367.6 tokINFO 01-04 14:14:16 [loggers.py:111] Engine 000: Avg prompt throughput: 797.4 tokens/INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 139 |
+
INFO 01-04 14:14:22 [loggers.py:111] Engine 000: AvgINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 140 |
+
INFO 01-04 14:14:26 [loggers.py:111] Engine 000: Avg prompt throughput: 309.4 tokens/s, Avg generation throughput: 64.5 tokens/s, Running: 1 reINFO 01-04 14:14:26 [loINFO: 10.46.50.192:0 - "POST /v1/completions HTTPINFO 01-04 14:14:36 [loggers.py:111] Engine 000: Avg prompt throughput: 372.2 tokens/s, Avg generation throughput: 90.5 tokens/s, Running: 2 rINFO 01-04 14:14:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tINFO: 10INFO 01-04 14:14:42 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tokeINFO 01-04 14:14:46 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:14:46 [lINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 141 |
+
INFO 01-04 14:14:52 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:14:56 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:14:56 [loggers.py:111] Engine 000: Avg prompt throughput: 1015.6 tokens/sINFO 01-04 14:15:02 [loggers.py:111] Engine 000: Avg prompt throughput: 346.2 tokens/s, Avg generation throughput: 108.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.6%, INFO 01-04 14:15:06 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:15:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg INFO 01-04 14:15:12 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:15:16 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:15:16 [loggers.py:111] Engine 000: Avg prompt throughput: 905.5 tokens/s, Avg generation throughput: 88.9 tokens/s, Running: 3 reqs, INFO: 1INFO 01-04 14:15:22 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:15:26 [loggers.py:111] Engine 000: Avg prompt throughputINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 142 |
+
INFO 01-04 14:15:26 [loggers.py:111] Engine 000: Avg prompt throughput: 227.3 tokenINFO: INFO 01-04 14:15:32 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:15:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 69.6 tokens/s, RunningINFO 01-04 14:15:36 [loggers.py:111] Engine 000: Avg prompt throughput: 372.0 tokeINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 143 |
+
INFO 01-04 14:15:46 [loggers.py:111] Engine 000: Avg prompt throughput: 422.2 tokens/s, Avg generation throughput: 52.1 tokens/s, RunniINFO 01-04 14:15:46 [loggers.py:111] Engine 000: Avg prompt throughput: 430.3 tokensINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 144 |
+
INFO 01-04 14:15:56 [loggers.py:111] Engine 000: Avg prompt throughput: 471.9 tokens/s, Avg generation throughput: 66.6 tokens/s, Running: 1 reqs, Waiting: 0 INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 2INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 145 |
+
INFO 01-04 14:16:06 [loggers.py:111] Engine 000: Avg prompt throughput: 286.2 tokens/s, Avg generation throughput: 78.7 tokens/s, RunnINFO 01-04 14:16:06 [loggINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 2INFO 01-04 14:16:16 [loggers.py:111] Engine 000: Avg prompt throughput: 508.3 tokens/s, Avg generation throughput: 75.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cINFO 01-04 14:16:16 [INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 146 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 147 |
+
INFO 01-04 14:16:26 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:16:26 [loggers.py:111] Engine 000: Avg prompt throughput: 454.6 tokens/s, Avg generation throughput: 114.2 tokens/s, Running: 3 reqs, WINFO: INFO 01-04 14:16:32 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:16:36 [loggers.py:111] Engine 000: Avg prompt throughput: 510.4 tokens/s, Avg generation throughput: 51.3 tokens/s, RunningINFO 01-04 14:16:36 [logINFO 01-04 14:16:42 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.4 tokeINFO 01-04 14:16:46 [loggers.py:111] Engine 000: Avg prompt throughput: 534.8 tokens/s, Avg generation throughput: 38.8 tokens/s, Running: 1 reqs, Waiting: 0 reqsINFO 01-04 14:16:52 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:16:56 [loggers.py:111] Engine 000: Avg prompt throughput: 561.4 tokens/s, Avg generation throughput: 50.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cache hit rate: 15.6%
|
| 148 |
+
INFO: INFO 01-04 14:17:02 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:17:06 [loggers.py:111] Engine 000: Avg prompt throughput: 556.6 tokens/s, Avg generation throughput: 83.4 tokens/s, Running: 2 reqs, Waiting: 0 reqsINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 149 |
+
INFO 01-04 14:17:16 [loggers.py:111] Engine 000: Avg prompt throughput: 648.5 tokens/s, Avg generation throughput: 93.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.0%, Prefix cacheINFO 01-04 14:17:INFO: INFO 01-04 14:17:22 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:17:26 [loggers.py:111] Engine 000: Avg prompt throughput: 525.0 tokens/s, Avg generation throughput: 81.9 tokens/s, RunINFO 01-04 14:17:26 [loggers.py:111] Engine 000: Avg prompt throughput: 724.7 tokens/sINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 150 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 151 |
+
INFO 01-04 14:17:36 [loggers.py:111] Engine 000: Avg prompt throughput: 236.9 tokens/s, Avg generation throughput: 59.5 tokens/s, RunnINFO 01-04 14:17:36 [loggers.py:111] Engine 000: Avg prompt throughput: 389.6 tokens/INFO 01-04 14:17:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, RunninINFO 01-04 14:17:46 [loggers.py:111] Engine 000: Avg prompt throughput: 882.6 tokenINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 152 |
+
INFO 01-04 14:17:56 [loggers.py:111] Engine 000: Avg prompt throughput: 583.1 tokens/s, Avg generation throughput: 59.7 tokens/s, Running: INFO 01-04 14:17:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokeINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 153 |
+
INFO 01-04 14:18:06 [loggers.py:111] Engine 000: Avg prompt throughput: 516.2 tokens/s, Avg generation throughput: 74.6 tokens/s, RunninINFO 01-04 14:18:06 [logINFO 01-04 14:18:12 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:18:16 [loggers.py:111] Engine 000: Avg prompt throughput: 388.1 tokens/s, Avg generation throughput: 108.3 tokens/s, Running: 3 reqs, Waiting: 0 rINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 154 |
+
INFO 01-04 14:18:22 [loggers.py:111] Engine 000: Avg prompt thINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 155 |
+
INFO 01-04 14:18:26 [loggers.py:111] Engine 000: Avg prompt throughput: 305.7 tokens/s, Avg generation throughput: 93.2 tokens/s, Running: 2 reqs, Waiting: 0 rINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200INFO 01-04 14:18:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hitINFO 01-04 14INFO: INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 156 |
+
INFO 01-04 14:18:46 [loggers.py:111] Engine 000: Avg prompt throughput: 945.5 tokens/s, Avg generation throughput: 112.7 tokens/s, Running: 2 reqs, Waiting: 0 INFO 01-04 14:18:52 [loggers.py:111] Engine 000: Avg prompt throughput: 192.7 tokens/s, Avg generation throughput: 63.7 tokenINFO 01-04 14:18:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.3 tokens/s, Running: 1 reqs, Waiting: 0 reINFO 01-04 14:19:02 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:19:06 [loggers.py:111] Engine 000: Avg prompt throughput: 605.3 tokens/s, Avg generation throughput: 83.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 17.7%
|
| 157 |
+
INFO 01-04 14:19:16 [loggers.py:111] Engine 000: Avg prompt throughput: 734.9 tokens/s, Avg generation throughput: 103.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.6%, Prefix cache hit rate: 17.2%
|
| 158 |
+
INFO 01-04 14:19:26 [loggers.py:111] Engine 000: Avg prompt throughput: 665.1 tokens/s, Avg generation throughput: 129.9 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 10.7%, Prefix cache hit rate: 16.8%
|
| 159 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 160 |
+
INFO 01-04 14:19:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 151.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.1%, Prefix cache hit rate: 16.8%
|
| 161 |
+
INFO: INFO 01-04 14:19:42 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:19:46 [loggers.py:111] Engine 000: Avg prompt throughput: 1336.2 tokens/s, Avg generation throughput: 130.0 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 11.0%, Prefix cache hit rate: 18.3%
|
| 162 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 163 |
+
INFOINFO 01-04 14:19:52 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:19:56 [loggers.py:111] Engine 000: Avg prompt throughput: 443.6 tokens/s, Avg generation throughput: 117.0 tokens/s, Running: 3 reqs, WaitingINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OININFO 01-04 14:20:02 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:20:06 [loggers.py:111] Engine 000: Avg prompt throughput: 542.3 tokens/s, Avg generation throughput: 106.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.9%, Prefix cache hitINFO 01-04 14IINFO 01-04 14:20:12 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:20:16 [loggers.py:111] Engine 000: Avg prompt throughput: 610.3 tokens/s, Avg generation throughput: 114.6 tokens/s, Running: 3 reqs, WaitinINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 164 |
+
INFO 01-04 14:20:22 [loggers.py:111] Engine 000: Avg prompt throINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 165 |
+
INFO 01-04 14:20:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg gINFO 01-04 14:20:32 [loggers.py:111] Engine 000: Avg prompt throughput: 778.7 tokens/s, Avg generation throughput: 60.1 tokens/INFO 01-04 14:20:36 [loggers.py:111] Engine 000: Avg prompt throughput: 1211.9 tokens/s, Avg generation throughput: 69.9 tokens/s, Running: 3 reqs, Waiting:INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 166 |
+
INFO 01-04 14:20:42 [loggers.py:111] Engine 000: Avg prompt throINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 167 |
+
INFO 01-04 14:20:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFO 01-04 14:2INFO 01-04 14:20:52 [loggers.py:111] Engine 000: Avg prompt throughput: 664.6 tokens/s, Avg generation throughput: 58.0 tokens/s, INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 168 |
+
INFO 01-04 14:20:56 [loggers.py:111] Engine 000: Avg prompt throughput: 568.1 tokens/s,INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 169 |
+
INFO 01-04 14:21:02 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:21:06 [loggers.py:111] Engine 000: Avg prompt throughput: 353.6 tokens/s, Avg generation throughput: 71.0 tokens/s, Running: INFO 01-04 1INFO 01-04 14:21:12 [loggers.py:111] Engine 000: Avg prompt throughput: 610.5 tokens/s, Avg generation throughput: 28.6 tokens/s, RunnINFO 01-04 14:21:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.INFO: 1INFO 01-04 14:21:22 [loggers.py:111] Engine 000: Avg prompt throughput: 632.5 tokens/s, Avg generation throughput: 69.1 tokens/s, RuINFO 01-04 14:21:26 [loggers.py:111] Engine 000: Avg prompt throughput: 1078.0 tokens/s, Avg generation throughput: 91.3 tokens/s, Running: 3 reqs, WaitiINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 170 |
+
INFO 01-04 14:21:32 [loggers.py:111] Engine 000: Avg prompt throughINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 171 |
+
INFO 01-04 14:21:36 [loggers.py:111] Engine 000: Avg prompt throughput: 444.3 tokens/sINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 172 |
+
INFO 01-04 14:21:42 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:21:46 [loggers.py:111] Engine 000: Avg prompt throughput: 616.4 tokens/s, Avg generation throughput: 87.4 tokens/s, Running: 3 reqs, WaINFO 01-04 14:21:52 [loggers.py:111] Engine 000: Avg prompt throughput: 477.0 tokens/s, Avg generation throughput: 50.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cache hit rate: 15.5%
|
| 173 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 174 |
+
INFO 01-04 14:22:02 [loggers.py:111] Engine 000: Avg prompt throughput: 818.0 tokens/s, Avg generation throughput: 81.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.1%, Prefix cache hit rate: 15.2%
|
| 175 |
+
INFO 01-04 14:22:12 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.5%, Prefix cache hit rate: 15.2%
|
| 176 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 177 |
+
INFO 01-04 14:22:22 [loggers.py:111] Engine 000: Avg prompt throughput: 302.9 tokens/s, Avg generation throughput: 91.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 15.0%
|
| 178 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTPINFO 01-04 14INFO 01-04 14:22:32 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 55.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hitINFO 01-04 14INFO 01-04 14:22:42 [loggers.py:111] Engine 000: Avg prompt throughput: 846.2 tokens/s, Avg generation throughput: 76.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hit rate: 14.7%
|
| 179 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTPINFO 01-04 14INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 180 |
+
INFO 01-04 14:22:52 [loggers.py:111] Engine 000: Avg prompt throughput: 737.4 tokens/s, Avg generation throughput: 73.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hINFO 01-04 14:22:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0INFO 0INFO 01-04 14:23:02 [loggers.py:111] Engine 000: Avg prompt throughput: 610.5 tokens/s, Avg generation throughput: 33.9 tokens/s, RunninINFO 01-04 14:23:06 [loggers.py:111] Engine 000: Avg prompt throughput: 437.0INFO 0INFO 01-04 14:23:12 [loggers.py:111] Engine 000: Avg prompt throughput: 462.1 tokens/s, Avg generation throughput: 60.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cacheINFO 01-04INFO 01-04 14:23:16 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:23:22 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 56.1 tokens/s, Running:INFO 01-04 14:23:26 [loggers.py:111] Engine 000: Avg prompt throughput: 820.2 tokens/s, Avg generation throughput: 18.6 tokens/s, Running: 1 reqINFINFO 01-04 14:23:32 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:23:36 [loggers.py:111] Engine 000: Avg prompt throughput: 706.9 tokens/s, Avg generation throughput: 61.3 tokens/s, Running: 2 reqs, INFO 01-04 14:23:42 [loggers.py:111] Engine 000: Avg prompt throughput: 827.2 tokens/s, Avg generation throughput: 62.7 tokens/s, RunninINFO 01-04 14:23:46 [loggers.py:111] Engine 000: Avg prompt throughput: 702.2 tokenINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 181 |
+
INFO 01-04 14:23:52 [loggers.py:111] Engine 000: Avg prompt throughput: 886.0 tokens/s, Avg generation throughput: 64.8 tokens/s, RunnINFO 01-04 14:23:56 [loggers.py:111] Engine 000: Avg prompt throughput: 677.2 tokens/INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 182 |
+
INFO 01-04 14:24:02 [loggers.py:111] Engine 000: Avg prompt throughput: 616.8 tokens/s, Avg generation throughput: 86.3 tokens/s, RuINFO 01-04 14:24:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 52.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 1INFO INFO 01-04 14:24:16 [loggers.py:111] Engine 000: Avg prompt throughput: 617.8 tokens/s, Avg generation throughput: 76.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.7%, Prefix cache hit rate: 1INFO IINFO 01-04 14:24:22 [loggers.py:111] Engine 000: Avg prompt throuINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 183 |
+
INFO 01-04 14:24:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, INFO 01-04 14:24:32 [loggers.py:111] Engine 000: Avg prompt throughput: 735.9 tokens/s, Avg generation throughput: 66.0 tokens/s, RINFO 01-04 14:24:36 [loggers.py:111] Engine 000: Avg prompt throughput: 931.7 tokens/s, INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 184 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 185 |
+
IINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 186 |
+
INFO 01-04 14:24:46 [loggers.py:111] Engine 000: Avg prompt throughput: 700.4 tokens/s, Avg generation throughput: 50.5 tokens/s, Running: 1 reqs, WaINFO 01-04 14:24:52 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:24:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, WaitINFO 01-04 14:25:02 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, RunningINFO 01-04 14:25:06 [loggers.py:111] Engine 000: Avg prompt throughput: 744.9 tokeINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 187 |
+
INFO 01-04 14:25:12 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:25:16 [loggers.py:111] Engine 000: Avg prompt throughput: 1012.7 tokens/s, Avg generation throughput: 63.6 tokens/s, Running: 2 reqs, INFO 01-04 14:25:22 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:25:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.6%, Prefix cache hit rate: 17INFOINFO 01-04 14:25:36 [loggers.py:111] Engine 000: Avg prompt throughput: 1362.4 tokens/s, Avg generation throughput: 119.0 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 13.4%, Prefix cache hit rate:INFO 01-04 14:25:42 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 2 rINFO 01-04 14:25:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 188 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 189 |
+
INFO 01-04 14INFO 01-04 14:25:56 [loggers.py:111] Engine 000: Avg prompt throughput: 559.5 tokens/s, Avg generation throughput: 107.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 9.8%, Prefix cache hiINFO 01-04 14:26:02 [loggers.py:111] Engine 000: Avg prompt throughput: 777.3 tokens/s, Avg generation throughput: 58.6 tokens/s, Running: 2 reqs, INFO 01-04 14:26:06 [loggers.py:111] Engine 000: Avg prompt throughput: INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 190 |
+
INFO 01-04 14:26:12 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokenINFO 01-04 14:26:16 [loggers.py:111] Engine 000: Avg prompt throughput: 872.4 tokens/s, Avg generation throughput: 66.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.6%, Prefix cache INFO 01-04 14:26INFO 01-04 14:26:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 88.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 9.0%, Prefix cacheINFO 01-04 14:26:32 [loggers.py:111] Engine 000: Avg prompt throughput: 806.4 tokensINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 191 |
+
INFO 01-04 14:26:36 [loggers.py:111] Engine 000: Avg prompt throughpuINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 192 |
+
INFO 01-04 14:26:42 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, INFO 01-04 14:26:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.2 tokens/s, RunINFO 01-04 14:26:52 [loggers.py:111] Engine 000: Avg prompt throughput: 355.6 tokens/sINFO 01-04 14:26:56 [loggers.py:111] Engine 000: Avg prompt throughput: 741.0 tokens/s, Avg generation throughput: 54.7 tokens/s, RunINFO 01-04 14:27:02 [loggers.py:111] Engine 000: Avg prompt throughput: 874.8 tokens/sINFO 01-04 14:27:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 89.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.8%, Prefix cache hit rate: 19.9%
|
| 193 |
+
INFO: 10.45.190.192:0 - "POST /v1/completiINFO 01-04 14:27:12 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 194 |
+
INFO 01-04 14:27:16 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:27:22 [loggers.py:111] Engine 000: Avg prompt throughput: 818.3 tokens/s, Avg generation throughput: 22.2 tokens/s, Running: 1 reqs, Waiting: INFO 01-04 14:27:26 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:27:32 [loggers.py:111] Engine 000: Avg prompt throughput: 627.7 tokens/s, Avg generation throughput: 58.7 tokens/s, Running: 2 reqs, Waiting: INFO 01-04 14:27:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, RuINFO 01-04 14:27:42 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 53.6 tokens/s, Running: 1 reqs, Waiting:INFO 01-04 14:27:46 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:27:52 [loggers.py:111] Engine 000: Avg prompt throughput: 842.7 tokens/s, Avg generation throughput: 84.9 tokens/s, Running: 2 reqs, Waiting:INFO 01-04 14:27:56 [loggers.py:111] Engine 000: Avg prompt throINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 195 |
+
INFO 01-04 14:28:02 [loggers.py:111] Engine 000: Avg prompt throughput: 771.6 tokens/s, Avg generation throughput: 84.7 tokens/s, Running: 2 reqs, WaitiINFO 01-04 14:28:06 [loggers.py:111] Engine 000: Avg prompt throughINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 196 |
+
INFO 01-04 14:28:12 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvINFO 01-04 14:28:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 44.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, PrefiINFO 01-04 14:28:22 [loggers.py:111] Engine 000: Avg prompt throughput: 355.3 tokens/s, INFO 01-04 14:28:26 [loggers.py:111] Engine 000: Avg prompt throughput: 619.1 tokens/s, Avg generation throughput: 58.5 tokens/s, RINFO 01-04 14:28:32 [loggers.py:111] Engine 000: Avg prompt throughput: 799.2 tokens/s, INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 197 |
+
INFO 01-04 14:28:36 [loggers.py:111] Engine 000: Avg prompt throINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 198 |
+
INFO 01-04 14:28:42 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvgINFO 01-04 14:28:46 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:28:46 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:28:52 [loggers.py:111] Engine 000: Avg prompt throughput: 551.0 tokens/s, AvgINFO 01-04 14:28:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, RuINFO 01-04 14:28:56 [loggers.py:111] Engine 000: Avg prompt throINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 199 |
+
INFO 01-04 14:29:02 [INFO 01-04 14:29:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.6 tokens/s, RunniINFO 01-04 14:29:06 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:29:12 [INFO 01-04 14:29:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RunninINFO 01-04 14:29:16 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:29:22 [INFO 01-04 14:29:26 [loggers.py:111] Engine 000: Avg prompt throughput: 714.5 tokens/s, Avg generation throughput: 24.3 tokens/s, RunINFO 01-04 14:29:26 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:29:32 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AINFO 01-04 14:29:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 23.1 tokens/s, RuINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 200 |
+
INFO 01-04 14:29:42 [INFO 01-04 14:29:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RunninINFO 01-04 14:29:46 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:29:52 INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 201 |
+
INFO 01-04 14:30:06 [loggers.py:111] Engine 000: Avg prompt throughput: 902.4 tokens/s, Avg generation throughput: 27.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.2%
|
| 202 |
+
INFO 01-04 14:30:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.2%
|
| 203 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 204 |
+
INFO 01-04 14:30:46 [loggers.py:111] Engine 000: Avg prompt throughput: 865.1 tokens/s, Avg generation throughput: 23.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.9%
|
| 205 |
+
INFO 01-04 14:30:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.9%
|
| 206 |
+
INFO 01-04 14:31:16 [loggers.py:111] Engine 000: Avg prompt throughput: 895.3 tokens/s, Avg generation throughput: 44.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 18.6%
|
| 207 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 208 |
+
INFO 01-04 14:31:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 25.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.6%
|
| 209 |
+
INFO 01-04 14:31:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.6%
|
| 210 |
+
INFO 01-04 14:31:46 [loggers.py:111] Engine 000: Avg prompt throughput: 647.1 tokens/s, Avg generation throughput: 23.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 18.4%
|
| 211 |
+
INFO 01-04 14:31:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 18.4%
|
| 212 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 213 |
+
INFO 01-04 14:32:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 10.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.4%
|
| 214 |
+
INFO 01-04 14:32:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.4%
|
| 215 |
+
INFO 01-04 14:32:26 [loggers.py:111] Engine 000: Avg prompt throughput: 633.5 tokens/s, Avg generation throughput: 20.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 18.2%
|
| 216 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 217 |
+
INFO 01-04 14:32:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 28.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.2%
|
| 218 |
+
INFO 01-04 14:32:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.2%
|
| 219 |
+
INFO 01-04 14:33:06 [loggers.py:111] Engine 000: Avg prompt throughput: 468.8 tokens/s, Avg generation throughput: 29.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 18.0%
|
| 220 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 221 |
+
INFO 01-04 14:33:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.0%
|
| 222 |
+
INFO 01-04 14:33:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.0%
|
| 223 |
+
INFO 01-04 14:33:46 [loggers.py:111] Engine 000: Avg prompt throughput: 709.4 tokens/s, Avg generation throughput: 15.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 17.8%
|
| 224 |
+
INFO 01-04 14:33:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 17.8%
|
| 225 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 226 |
+
INFO 01-04 14:34:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 21.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.8%
|
| 227 |
+
INFO 01-04 14:34:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.8%
|
| 228 |
+
INFO 01-04 14:35:06 [loggers.py:111] Engine 000: Avg prompt throughput: 415.5 tokens/s, Avg generation throughput: 18.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix cache hit rate: 17.7%
|
| 229 |
+
INFO 01-04 14:35:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 17.7%
|
| 230 |
+
INFO 01-04 14:35:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 17.7%
|
| 231 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 232 |
+
INFO 01-04 14:35:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.7%
|
| 233 |
+
INFO 01-04 14:35:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.7%
|
| 234 |
+
INFO 01-04 14:36:16 [loggers.py:111] Engine 000: Avg prompt throughput: 1200.1 tokens/s, Avg generation throughput: 17.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.8%, Prefix cache hit rate: 17.3%
|
| 235 |
+
INFO 01-04 14:36:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.0%, Prefix cache hit rate: 17.3%
|
| 236 |
+
INFO 01-04 14:36:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.2%, Prefix cache hit rate: 17.3%
|
| 237 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 238 |
+
INFO 01-04 14:36:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 16.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.3%
|
| 239 |
+
INFO 01-04 14:36:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.3%
|
| 240 |
+
INFO 01-04 14:37:06 [loggers.py:111] Engine 000: Avg prompt throughput: 1476.1 tokens/s, Avg genINFO 01-04 14:37:15 [loggers.py:111] Engine 000: Avg prompt throughput: 995.5 tokens/s, Avg generation throughput: 3.7 tokenINFO 01-04 14:37:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg geneINFO 01-04 14:37:25 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4INFO 01-04 14:38:06 [loggers.py:111] Engine 000: Avg prompt throughput: 834.8 tokens/s, Avg generation throughput: 6.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 14.9%
|
| 241 |
+
INFO 01-04 14:38:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 14.9%
|
| 242 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 243 |
+
INFO 01-04 14:38:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatioINFO 01-04 14:39:05 [loggers.py:111] Engine 000: Avg prompt throughput: 1156.2 tokens/s, Avg generation throughput: 2.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cache hit rate: 17.7%
|
| 244 |
+
INFO 01-04 14:39:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46INFO 01-04 14:39:56 [loggers.py:111] Engine 000: Avg prompt throughput: 666.3 tokens/s, Avg generation throughput: 37.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 15.6%
|
| 245 |
+
INFO 01-04 14:40:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 15.6%
|
| 246 |
+
INFO 01-04 14:40:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GINFO 01-04 14:41:35 [loggers.py:111] Engine 000: Avg prompt throughput: 626.1 tokens/s, Avg generation throughput: 13.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 17.5%
|
| 247 |
+
INFO 01-04 14:41:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 17.5%
|
| 248 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 249 |
+
INFO 01-04 14:41:55 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 13.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.5%
|
| 250 |
+
INFO 01-04 14:42:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokenINFO 01-04 14:43:16 [loggers.py:111] Engine 000: Avg prompt throughput: 719.8 tokens/s, Avg generation throughput: 5.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 17.5%
|
| 251 |
+
INFO 01-04 14:43:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 17.5%
|
| 252 |
+
INFO 01-04 14:43:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 17.5%
|
| 253 |
+
INFO 01-04 14:43:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqsINFO 01-04 14:45:05 [loggers.py:111] Engine 000: Avg prompt throughput: 1882.9 tokens/s, Avg generation throughput: 35.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.5%, Prefix cache hit rate: 16.9%
|
| 254 |
+
INFO 01-04 14:45:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.7%, Prefix cache hit rate: 16.9%
|
| 255 |
+
INFO 01-04 14:45:25 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.9%, Prefix cache hit rate: 16.9%
|
| 256 |
+
INFO 01-04 14:45:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.5 tokens/s, Running: 1 reqs, Waiting: 0INFO 01-04 14:46:09 [loggers.py:111] Engine 000: Avg prompt throughput: 463.5 tokens/s, Avg generation throughput: 15.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 16.6%
|
| 257 |
+
INFO 01-04 14:46:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 16.6%
|
| 258 |
+
INFO 01-04 14:46:29 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 16.6%
|
| 259 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 260 |
+
INFO 01-04 14:46:39 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:48:45 [loggers.py:111] Engine 000:INFO 01-04 14:49:33 [loggers.py:111] Engine 000: Avg prompt throughput: 1074.6 tokens/s, Avg generation throughput: 45.6 tokens/INFO 01-04 14:59:23 [loggers.py:111] Engine 000: Avg prompt throughput: 471.3 tokens/s, Avg generation throughput: 1.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 13.3%
|
| 261 |
+
INFO 01-04 14:59:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 13.3%
|
| 262 |
+
INFO 01-04 14:59:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 13.3%
|
| 263 |
+
INFO 01-04 14:59:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation througINFO 01-04 15:10:35 [loggers.py:111] Engine 000: Avg prompt throughput: 561.0 tokens/s, Avg generation throughput: 8.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 17.1%
|
| 264 |
+
INFO 01-04 15:10:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 17.1%
|
| 265 |
+
INFO 01-04 15:10:55 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 17.1%
|
| 266 |
+
INFO 01-04 15:11:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 17.1%
|
| 267 |
+
INFO 01-04 15:11:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 17.1%
|
| 268 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 269 |
+
INFO 01-04 15:11:25 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 13.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.1%
|
| 270 |
+
INFO 01-04 15:11:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.1%
|
| 271 |
+
6.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 272 |
+
INFO 01-04 15:04:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.6%
|
| 273 |
+
INFO 01-04 15:04:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.6%
|
| 274 |
+
1-04 15:01:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 13.3%
|
| 275 |
+
INFO 01-04 15:01:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 13.3%
|
| 276 |
+
INFO 01-04 15:01:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cache hit rate: 13.3%
|
| 277 |
+
INFO 01-04 15:02:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.8%, Prefix cache hit rate: 13.3%
|
| 278 |
+
INFO 01-04 15:02:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.0%, Prefix cache hit rate: 13.3%
|
| 279 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 280 |
+
INFO 01-04 15:02:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 6.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.3%
|
| 281 |
+
INFO 01-04 15:02:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.3%
|
| 282 |
+
] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.0%
|
| 283 |
+
INFO 01-04 14:58:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.0%
|
hf_ip/vllm_gpu7.log
ADDED
|
@@ -0,0 +1,377 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 2 |
+
warnings.warn(
|
| 3 |
+
INFO 01-04 13:13:45 [__init__.py:239] Automatically detected platform cuda.
|
| 4 |
+
INFO 01-04 13:13:48 [api_server.py:1043] vLLM API server version 0.8.5
|
| 5 |
+
INFO 01-04 13:13:48 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8008, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
|
| 6 |
+
INFO 01-04 13:13:56 [config.py:717] This model supports multiple tasks: {'classify', 'generate', 'score', 'reward', 'embed'}. Defaulting to 'generate'.
|
| 7 |
+
INFO 01-04 13:13:56 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
|
| 8 |
+
/usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
|
| 9 |
+
warnings.warn(
|
| 10 |
+
INFO 01-04 13:14:01 [__init__.py:239] Automatically detected platform cuda.
|
| 11 |
+
INFO 01-04 13:14:04 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
|
| 12 |
+
WARNING 01-04 13:14:05 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7f5053f564d0>
|
| 13 |
+
INFO 01-04 13:14:06 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
|
| 14 |
+
INFO 01-04 13:14:06 [cuda.py:221] Using Flash Attention backend on V1 engine.
|
| 15 |
+
WARNING 01-04 13:14:06 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
|
| 16 |
+
INFO 01-04 13:14:06 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
INFO 01-04 13:15:40 [loader.py:458] Loading weights took 94.24 seconds
|
| 35 |
+
INFO 01-04 13:15:41 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 94.501715 seconds
|
| 36 |
+
INFO 01-04 13:15:55 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
|
| 37 |
+
INFO 01-04 13:15:55 [backends.py:430] Dynamo bytecode transform time: 14.44 s
|
| 38 |
+
INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later use
|
| 39 |
+
INFO 01-04 13:16:49 [backends.py:148] Compiling a graph for general shape takes 53.17 s
|
| 40 |
+
INFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 67.61 s in total
|
| 41 |
+
INFO 01-04 13:17:54 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
|
| 42 |
+
INFO 01-04 13:17:54 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94x
|
| 43 |
+
INFO 01-04 13:18:33 [gpu_model_runner.py:1686] Graph capturing finished in 40 secs, took 1.21 GiB
|
| 44 |
+
INFO 01-04 13:18:33 [core.py:159] init engine (profile, create kv cache, warmup model) took 172.44 seconds
|
| 45 |
+
INFO 01-04 13:18:33 [core_client.py:439] Core engine process 0 ready.
|
| 46 |
+
WARNING 01-04 13:18:33 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
|
| 47 |
+
INFO 01-04 13:18:33 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 48 |
+
INFO 01-04 13:18:33 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
|
| 49 |
+
INFO 01-04 13:18:33 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8008
|
| 50 |
+
INFO 01-04 13:18:33 [launcher.py:28] Available routes are:
|
| 51 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /openapi.json, Methods: GET, HEAD
|
| 52 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /docs, Methods: GET, HEAD
|
| 53 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: GET, HEAD
|
| 54 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /redoc, Methods: GET, HEAD
|
| 55 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /health, Methods: GET
|
| 56 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /load, Methods: GET
|
| 57 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /ping, Methods: GET, POST
|
| 58 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /tokenize, Methods: POST
|
| 59 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /detokenize, Methods: POST
|
| 60 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/models, Methods: GET
|
| 61 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /version, Methods: GET
|
| 62 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
|
| 63 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/completions, Methods: POST
|
| 64 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/embeddings, Methods: POST
|
| 65 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /pooling, Methods: POST
|
| 66 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /score, Methods: POST
|
| 67 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/score, Methods: POST
|
| 68 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
|
| 69 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /rerank, Methods: POST
|
| 70 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/rerank, Methods: POST
|
| 71 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /v2/rerank, Methods: POST
|
| 72 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /invocations, Methods: POST
|
| 73 |
+
INFO 01-04 13:18:33 [launcher.py:36] Route: /metrics, Methods: GET
|
| 74 |
+
INFO: Started server process [1319238]
|
| 75 |
+
INFO: Waiting for application startup.
|
| 76 |
+
INFO: Application startup compleIINFO 01-04 14:00:14 [loggers.py:111] Engine 000: Avg prompt throughput: 53.6 tokens/s, Avg generation throughput: 29.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.3%, Prefix cache hit rate: 0.0%
|
| 77 |
+
INFO 01-04 14:00:24 [loggers.py:111] Engine 000: Avg prompt throughput: 57.7 tokens/s, Avg generation throughput: 61.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, Prefix cache hit rate: 1.4%
|
| 78 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 79 |
+
INFO 01-04 14:00:34 [loggers.py:111] Engine 000: Avg prompt throughput: 58.8 tokens/s, Avg generation throughput: 98.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cache hit rate: 1.9%
|
| 80 |
+
INFO: 10.45.190.192:0 - "GET /v1/models HTTP/1.1" 200 OK
|
| 81 |
+
INFO 01-04 14:00:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 1.9%
|
| 82 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 83 |
+
INFO 01-04 14:00:54 [loggers.py:111] Engine 000: Avg prompt throughput: 64.9 tokens/s, Avg generation throughput: 71.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.0%, Prefix cache hit rate: 2.1%
|
| 84 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 85 |
+
INFO 01-04 14:01:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 84.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.5%, Prefix cache hit rate: 2.1%
|
| 86 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 87 |
+
INFO 01-04 14:01:14 [loggers.py:111] Engine 000: Avg prompt throughput: 75.3 tokens/s, Avg generation throughput: 74.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.6%, Prefix cache hit rate: 2.1%
|
| 88 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 89 |
+
INFO 01-04 14:01:24 [loggers.py:111] Engine 000: Avg prompt throughput: 71.1 tokens/s, Avg generation throughput: 50.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.4%, Prefix cache hit rate: 2.1%
|
| 90 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 91 |
+
INFO 01-04 14:01:34 [loggers.py:111] Engine 000: Avg prompt throughput: 86.2 tokens/s, Avg generation throughput: 12.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.4%, Prefix cache hit rate: 2.1%
|
| 92 |
+
INFO 01-04 14:01:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.5%, Prefix cache hit rate: 2.1%
|
| 93 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 94 |
+
INFO 01-04 14:01:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 17.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 2.1%
|
| 95 |
+
INFO 01-04 14:02:04 [loggers.py:111] Engine 000: Avg prompt throughput: 90.2 tokens/s, Avg generation throughput: 33.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.5%, Prefix cache hit rate: 2.0%
|
| 96 |
+
INFO 01-04 14:02:14 [loggers.py:111] Engine 000: Avg prompt throughput: 110.2 tokens/s, Avg generation throughput: 51.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 1.7%
|
| 97 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 98 |
+
INFO 01-04 14:02:24 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.6%, Prefix cache hit rate: 1.7%
|
| 99 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 100 |
+
INFO 01-04 14:02:34 [loggers.py:111] Engine 000: Avg prompt throughput: 163.2 tokens/s, Avg generation throughput: 73.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cache hit rate: 1.6%
|
| 101 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 102 |
+
INFO 01-04 14:02:44 [loggers.py:111] Engine 000: Avg prompt throughput: 80.9 tokens/s, Avg generation throughput: 107.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cache hit rate: 1.4%
|
| 103 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 104 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 105 |
+
INFO 01-04 14:02:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 60.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 1.4%
|
| 106 |
+
INFO 01-04 14:03:04 [loggers.py:111] Engine 000: Avg prompt throughput: 110.1 tokens/s, Avg generation throughput: 5.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.5%, Prefix cache hit rate: 1.3%
|
| 107 |
+
INFO 01-04 14:03:14 [loggers.py:111] Engine 000: Avg prompt throughput: 124.2 tokens/s, Avg generation throughput: 65.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 1.1%
|
| 108 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 109 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 110 |
+
INFO 01-04 14:03:24 [loggers.py:111] Engine 000: Avg prompt throughput: 132.8 tokens/s, Avg generation throughput: 85.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.6%, Prefix cache hit rate: 1.1%
|
| 111 |
+
INFO 01-04 14:03:34 [loggers.py:111] Engine 000: Avg prompt throughput: 138.8 tokens/s, Avg generation throughput: 80.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 1.0%
|
| 112 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 113 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 114 |
+
INFO 01-04 14:03:44 [loggers.py:111] Engine 000: Avg prompt throughput: 90.8 tokens/s, Avg generation throughput: 51.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.4%, Prefix cache hit rate: 1.1%
|
| 115 |
+
INFO 01-04 14:03:54 [loggers.py:111] Engine 000: Avg prompt throughput: 120.3 tokens/s, Avg generation throughput: 63.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 1.1%
|
| 116 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 117 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 118 |
+
INFO 01-04 14:04:04 [loggers.py:111] Engine 000: Avg prompt throughput: 119.8 tokens/s, Avg generation throughput: 80.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.5%, Prefix cache hit rate: 5.9%
|
| 119 |
+
INFO 01-04 14:04:14 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.7%, Prefix cache hit rate: 5.9%
|
| 120 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 121 |
+
INFO 01-04 14:04:24 [loggers.py:111] Engine 000: Avg prompt throughput: 267.0 tokens/s, Avg generation throughput: 68.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 5.1%
|
| 122 |
+
INFO 01-04 14:04:34 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 5.1%
|
| 123 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 124 |
+
INFO 01-04 14:04:44 [loggers.py:111] Engine 000: Avg prompt throughput: 283.8 tokens/s, Avg generation throughput: 114.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 4.6%
|
| 125 |
+
INFO 01-04 14:04:54 [loggers.py:111] Engine 000: Avg prompt throughput: 182.6 tokens/s, Avg generation throughput: 139.4 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 4.2%
|
| 126 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 127 |
+
INFO 01-04 14:05:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 145.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 4.2%
|
| 128 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 129 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 130 |
+
INFO 01-04 14:05:14 [loggers.py:111] Engine 000: Avg prompt throughput: 99.6 tokens/s, Avg generation throughput: 91.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 7.6%
|
| 131 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 132 |
+
INFO 01-04 14:05:24 [loggers.py:111] Engine 000: Avg prompt throughput: 202.0 tokens/s, Avg generation throughput: 96.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 13.6%
|
| 133 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 134 |
+
INFO 01-04 14:05:34 [loggers.py:111] Engine 000: Avg prompt throughput: 140.5 tokens/s, Avg generation throughput: 121.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 12.9%
|
| 135 |
+
INFO 01-04 14:05:44 [loggers.py:111] Engine 000: Avg prompt throughput: 241.2 tokens/s, Avg generation throughput: 123.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: 12.0%
|
| 136 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 137 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 138 |
+
INFO 01-04 14:05:54 [loggers.py:111] Engine 000: Avg prompt throughput: 211.4 tokens/s, Avg generation throughput: 96.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 11.3%
|
| 139 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 140 |
+
INFO 01-04 14:06:04 [loggers.py:111] Engine 000: Avg prompt throughput: 294.4 tokens/s, Avg generation throughput: 93.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 10.4%
|
| 141 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 142 |
+
INFO 01-04 14:06:14 [loggers.py:111] Engine 000: Avg prompt throughput: 236.0 tokens/s, Avg generation throughput: 112.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 15.1%
|
| 143 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 144 |
+
INFO 01-04 14:06:24 [loggers.py:111] Engine 000: Avg prompt throughput: 114.3 tokens/s, Avg generation throughput: 91.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 14.7%
|
| 145 |
+
INFO 01-04 14:06:34 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 14.7%
|
| 146 |
+
INFO 01-04 14:06:44 [loggers.py:111] Engine 000: Avg prompt throughput: 377.1 tokens/s, Avg generation throughput: 144.7 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.9%, Prefix cache hit rate: 14.6%
|
| 147 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 148 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 149 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 150 |
+
INFO 01-04 14:06:54 [loggers.py:111] Engine 000: Avg prompt throughput: 166.6 tokens/s, Avg generation throughput: 148.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 15.3%
|
| 151 |
+
INFO 01-04 14:07:04 [loggers.py:111] Engine 000: Avg prompt throughput: 126.1 tokens/s, Avg generation throughput: 131.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 14.9%
|
| 152 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 153 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 154 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 155 |
+
INFO 01-04 14:07:14 [loggers.py:111] Engine 000: Avg prompt throughput: 242.7 tokens/s, Avg generation throughput: 82.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 14.2%
|
| 156 |
+
INFO 01-04 14:07:24 [loggers.py:111] Engine 000: Avg prompt throughput: 207.8 tokens/s, Avg generation throughput: 82.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 14.9%
|
| 157 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 158 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 159 |
+
INFO 01-04 14:07:34 [loggers.py:111] Engine 000: Avg prompt throughput: 277.3 tokens/s, Avg generation throughput: 50.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 14.1%
|
| 160 |
+
INFO 01-04 14:07:44 [loggers.py:111] Engine 000: Avg prompt throughput: 213.5 tokens/s, Avg generation throughput: 48.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 13.6%
|
| 161 |
+
INFO 01-04 14:07:54 [loggers.py:111] Engine 000: Avg prompt throughput: 329.5 tokens/s, Avg generation throughput: 91.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 12.9%
|
| 162 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 163 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 164 |
+
INFO 01-04 14:08:04 [loggers.py:111] Engine 000: Avg prompt throughput: 215.0 tokens/s, Avg generation throughput: 92.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 13.3%
|
| 165 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 166 |
+
INFO 01-04 14:08:14 [loggers.py:111] Engine 000: Avg prompt throughput: 189.1 tokens/s, Avg generation throughput: 96.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 12.9%
|
| 167 |
+
INFO 01-04 14:08:24 [loggers.py:111] Engine 000: Avg prompt throughput: 292.2 tokens/s, Avg generation throughput: 99.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 16.0%
|
| 168 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 169 |
+
INFO 01-04 14:08:34 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 124.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 16.0%
|
| 170 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 171 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 172 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 173 |
+
INFO 01-04 14:08:44 [loggers.py:111] Engine 000: Avg prompt throughput: 254.9 tokens/s, Avg generation throughput: 87.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.4%
|
| 174 |
+
INFO 01-04 14:08:54 [loggers.py:111] Engine 000: Avg prompt throughput: 200.4 tokens/s, Avg generation throughput: 39.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.0%, Prefix cache hit rate: 15.0%
|
| 175 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 176 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 177 |
+
INFO 01-04 14:09:04 [loggers.py:111] Engine 000: Avg prompt throughput: 256.8 tokens/s, Avg generation throughput: 22.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.5%
|
| 178 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 179 |
+
INFO 01-04 14:09:14 [loggers.py:111] Engine 000: Avg prompt throughput: 539.6 tokens/s, Avg generation throughput: 51.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 13.5%
|
| 180 |
+
INFO 01-04 14:09:24 [loggers.py:111] Engine 000: Avg prompt throughput: 264.5 tokens/s, Avg generation throughput: 59.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 13.1%
|
| 181 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 182 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 183 |
+
INFO 01-04 14:09:34 [loggers.py:111] Engine 000: Avg prompt throughput: 244.4 tokens/s, Avg generation throughput: 45.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.0%, Prefix cache hit rate: 12.7%
|
| 184 |
+
INFO 01-04 14:09:44 [loggers.py:111] Engine 000: Avg prompt throughput: 398.8 tokens/s, Avg generation throughput: 57.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 12.1%
|
| 185 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 186 |
+
INFO 01-04 14:09:54 [loggers.py:111] Engine 000: Avg prompt throughput: 362.1 tokens/s, Avg generation throughput: 101.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 11.6%
|
| 187 |
+
INFO: INFO 01-04 14:09:57 [loggers.py:111] Engine 000: Avg proINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 188 |
+
INFO 01-04 14:10:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generaINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 189 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.INFO 01-04 14:10:14 [loggers.py:111] Engine 000: Avg prompt throughput: 448.8 tokens/s, Avg generation throughput: 19.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 11.1%
|
| 190 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 191 |
+
INFO 01-04 14:10:24 [loggers.py:111] Engine 000: Avg prompt throughput: 281.9 tokens/s, Avg generation throughput: 58.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 10.8%
|
| 192 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 193 |
+
INFO 01-04 14:10:34 [loggers.py:111] Engine 000: Avg prompt throughput: 286.5 tokens/s, Avg generation throughput: 31.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 10.5%
|
| 194 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 195 |
+
INFO 01-04 14:10:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.5%
|
| 196 |
+
INFO 01-04 14:10:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.5%
|
| 197 |
+
INFO 01-04 14:11:04 [loggers.py:111] Engine 000: Avg prompt throughput: 267.6 tokens/s, Avg generation throughput: 21.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 10.3%
|
| 198 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 199 |
+
INFO 01-04 14:11:14 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 36.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.3%
|
| 200 |
+
INFO 01-04 14:11:24 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.3%
|
| 201 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 202 |
+
INFO 01-04 14:11:34 [loggers.py:111] Engine 000: Avg prompt throughput: 397.6 tokens/s, Avg generation throughput: 34.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 9.9%
|
| 203 |
+
INFO 01-04 14:11:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0INFO 01-04 14:11:46 [loggers.pyINFO 01-04 14:11:54 [loggers.py:111] Engine 000: Avg prompt throughput: 305.7 tokens/s, Avg generation throughput: 2.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 9.6%
|
| 204 |
+
INFO 01-04 14:12:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 9.6%
|
| 205 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 206 |
+
INFO 01-04 14:12:14 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 31.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-04 14:12:16 [loggers.py:11INFO 01-04 14:12:24 [loggers.py:111] Engine 000: Avg prompt throughput: 527.5 tokens/s, Avg generation throughpINFO 01-04 14:12:29 [loggers.py:111] Engine 000: Avg prompt throughput: 189.1 tokens/s, Avg generationINFO 01-04 14:12:37 [loggers.py:111] Engine 000: Avg prompt throughput: 322.6 tokens/s, Avg generation throughput: 1INFO 01-04 14:12:39 [loggers.py:111] Engine 000: Avg prompt throughput: 528.0 tokens/s, Avg generation INFO 01-04 14:12:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage:INFO 01-04 14:12:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thINFO 01-04 14:12:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usaINFO 01-04 14:12:59 [loggers.py:111] EnINFO 01-04 14:13:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usaINFO 01-04 14:13:09 [loggers.py:111] EnINFO 01-04 14:13:17 [loggers.py:111] Engine 000: Avg prompt throughput: 507.0 tokens/s, Avg generation throughput: 19.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usINFO 01-04 14:13:19 [loggers.py:111] EngINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 207 |
+
INFO 01-04 14:13:27 [loggers.py:111] Engine 000: Avg prompt throughput: 577.2 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 15.9%
|
| 208 |
+
INFO 01-04 14:13:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 15.9%
|
| 209 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 210 |
+
INFO 01-04 14:13:47 [loggers.py:111] Engine 000: Avg prompt throughput: 950.4 tokens/s, Avg generation throughput: 58.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 16.5%
|
| 211 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 212 |
+
INFO 01-04 14:13:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 87.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 16.5%
|
| 213 |
+
INFO: 10.43.30.3:0 -INFO 01-04 14:13:59 [loggers.py:111] EngINFO 01-04 14:14:07 [loggers.py:111] Engine 000: Avg prompt throughput: 286.2 tokens/s, Avg generation throughput: 27.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache uINFO: 10.43.30.4:0 - "POST /v1/compleINFO 01-04 14:14:17 [loggers.py:111] Engine 000: Avg prompt throughput: 391.1 tokens/s, Avg generation throughput: 70.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 17.9%
|
| 214 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 215 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 216 |
+
INFO 01-04 14:14:27 [loggers.py:111] Engine 000: Avg prompt throughput: 312.5 tokens/s, Avg generation throughput: 115.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 17.5%
|
| 217 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 218 |
+
INFO 01-04 14:14:37 [loggers.py:111] Engine 000: Avg prompt throughput: 975.2 tokens/s, Avg generation throughput: 55.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU INFO 01-04 14:14:39 [loggers.py:111] Engine 000: AvINFO 01-04 14:14:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 16.5%
|
| 219 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 220 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 221 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 222 |
+
INFO 01-04 14:14:57 [loggers.py:111] Engine 000: Avg prompt throughput: 656.0 tokens/s, Avg generation throughput: 56.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 17.6%
|
| 223 |
+
INFO 01-04 14:15:07 [loggers.py:111] Engine 000: Avg prompt throughput: 436.1 tokens/s, Avg generation throughput: 78.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 18.2%
|
| 224 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 225 |
+
INFO 01-04 14:15:17 [loggers.py:111] Engine 000: Avg prompt throughput: 444.8 tokens/s, Avg generation throughput: 100.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 17.7%
|
| 226 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 227 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 228 |
+
INFO 01-04 14:15:27 [loggers.py:111] Engine 000: Avg prompt throughput: 528.0 tokens/s, Avg generation throughput: 77.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 17.3%
|
| 229 |
+
INFO: 10.46.INFO: 10.43.30.3:0 - "POST /v1/completions HTTINFO 01-04 14:15:37 [loggers.py:111] Engine 000: Avg prompt throughput: 385.4 tokens/s, Avg generation throughput: 40.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 16.9%
|
| 230 |
+
INFO 01-04 14:15:47 [loggers.py:111] Engine 000: Avg prompt throughput: 353.2 tokens/s, Avg generation throughput: 83.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 17.1%
|
| 231 |
+
INFO: INFO 01-04 14:15:49 [loggers.py:111] Engine 000: Avg prompINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 232 |
+
INFO 01-04 14:15:57 [loggers.py:111] Engine 000: Avg prompt throughput: 433.8 tokens/s, Avg generation throughput: 43.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 18.6%
|
| 233 |
+
INFO 01-04 14:16:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 18.6%
|
| 234 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 235 |
+
INFO 01-04 14:16:17 [loggers.py:111] Engine 000: Avg prompt throughput: 1148.9 tokens/s, Avg generation throughput: 66.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs,INFO 01-04 14:16:19 [loggers.py:111] Engine 000: Avg proINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 236 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 237 |
+
INFO 01-04 14:16:27 [loggers.py:111] Engine 000: Avg prompt throughput: 348.1 tokens/s, Avg geneINFO 01-04 14:16:29 [loggers.py:111] Engine 000: Avg prompt throughput: 559.7 tokens/s, Avg generation throughput: 106.4 toINFO 01-04 14:16:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO 01-04 14:16:39 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 134.1 tINFO 01-04 14:16:47 [loggers.py:111] Engine 000: Avg prompt throughput: 740.6 tokens/s, Avg generaINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 238 |
+
INFO 01-04 14:16:49 [loggers.py:111] Engine 000: Avg promINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 239 |
+
INFO 01-04 14:16:57 [loggers.py:111] Engine 000: Avg prompt throughput: 506.6 tokens/s, Avg generation throughput: 126.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache hit rate: 18.2%
|
| 240 |
+
INFO: 10INFO 01-04 14:16:59 [loggers.py:111] Engine 000: AvgINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 241 |
+
INFO 01-04 14:17:07 [loggers.py:111] Engine 000: Avg prompt throughput: 421.8 tokens/s, Avg generation throughput: 88.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache hit rate: 17.9%
|
| 242 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 243 |
+
INFO: 10.46.17.192INFO 01-04 14:17:14 [loggers.py:111] Engine INFO 01-04 14:17:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thINFO 01-04 14:17:19 [loggers.py:111] Engine 000: Avg prompt throughput: 6INFO 01-04 14:17:24 [loggers.py:111] EngiINFO 01-04 14:17:27 [loggers.py:111] Engine 000: Avg prompt throughput: 522.6 tokens/s, Avg generation throughput: 44.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 19.4%
|
| 244 |
+
INFO 01-04 14:17:37 [loggers.py:111] Engine 000: Avg prompt throughput: 908.0 tokens/s, Avg generation throughput: 92.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.2%, Prefix cache hit rate: 18.7%
|
| 245 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 246 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 247 |
+
INFO 01-04 14:17:47 [loggers.py:111] Engine 000: Avg prompt throughput: 459.1 tokens/s, Avg generation tINFO 01-04 14:17:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.INFO: 10.46.50.192:0 INFO 01-04 14:17:54 [loggers.py:111] EngiINFO 01-04 14:17:57 [loggers.py:111] Engine 000: Avg prompt throughput: 477.8 tokens/s, Avg generatINFO 01-04 14:17:59 [loggers.py:111] Engine 000: Avg prompt throughput: 1344.9 INFO 01-04 14:18:04 [loggers.py:111] EngiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 248 |
+
INFO 01-04 14:18:07 [loggers.py:111] Engine 000: Avg prompt throughput: 594.1 tokens/s, Avg generation throughpuINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.INFO 01-04INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 249 |
+
INFO 01-04 14:18:14 [loggers.py:111] EnginINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 250 |
+
INFO 01-04 14:18:17 [loggers.py:1INFO 01-04 14:18:19 [loggers.py:111] Engine 000: Avg prompt throughput: 967.8 tINFO 01-04 14:18:24 [loggers.py:111] Engine 000: Avg prompt throughput: 480.5 tokens/s, Avg generation throINFO 01-04 14:18:27 [loggers.py:111] Engine 000: Avg prompt throughput: 594.8 tokens/s, Avg generatINFO 01-04 14:18:29 [loggers.py:111] Engine 000: Avg prompt throughput: 537.9 toINFO: 10.46.50.192:0 - "POST /v1/comINFO 01-04 14:18:37 [loggers.py:111] Engine 000: Avg prompt throughput: 747.7 tokens/s, Avg generation throughput: 128.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.3%, Prefix cache hit rate: 17.6%
|
| 251 |
+
INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 252 |
+
INFO: 10.46.50.192:0 - "INFO 01-04 14:18:44 [loggers.py:111] Engine 000: Avg prompt throughput: 414.9 tokens/s, Avg generationINFO 01-04 14:18:47 [loggers.py:111INFO 01-04 14:18:49 [loggers.py:111] Engine 000: Avg prompt throughput: 554.6 tokeINFO 01-04 14:18:54 [loggers.py:111] Engine 000: Avg prompt throughput: 334.6 tokens/s, Avg generationINFO 01-04 14:18:57 [loggers.py:111INFO 01-04 14:18:59 [loggers.py:111] Engine 000: Avg prompt throughput: 750.9 tokens/s, Avg generation throughput: 62.4 tokens/s, Running: 2 reqs, WaINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 253 |
+
INFO 01-04 14:19:04 [loggers.py:111INFO: 10.45.190.192:0 - "POST /INFO 01-04 14:19:09 [loggers.py:111] Engine 000: Avg prompt throughput: 679.8 tokens/s, Avg generation throughput: 68.7 tokens/s, Running: 2 reqs, WaiINFO 01-04 14:19:14 [loggers.py:111] Engine 000: Avg prompt throughput: 277.5 tokens/s, Avg generatioINFO 01-04 14:19:17 [loggers.py:111] Engine 000: Avg prompt throughput: 412.3 tokens/s, Avg generaINFO 01-04 14:19:19 INFO 01-04 14:19:24 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 137.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 254 |
+
INFO 01-04 14:19:27 [loggers.pyINFO 01-04 14:19:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 255 |
+
INFO 01-04 14:19:34 [loggers.py:111] Engine 000: Avg prompt throughput: 609.1 tokens/s, Avg generation INFO 01-04 14:19:37 [loggers.py:1INFO 01-04 14:19:39 [loggers.py:111] Engine 000: Avg prompt throughput: 327.6 tokens/s, Avg generation throughput: 53.4 tokens/s, Running: 2 reqs, WINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 256 |
+
INFOINFO 01-04 14:19:49 [loggers.py:111] Engine 000: Avg prompt throughput: 630.0 tokens/s, Avg generation throughput: 88.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.1%, Prefix cache hit rate: INFO: INFO: 10.43.30.5:0 - "POST /v1INFO: 10.46.17.192:0 - "POINFO 01-04 14:19:59 [loggers.py:111] Engine 000: Avg prompt throughput: 704.9 tokens/s, Avg generation throughput: 98.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.7%, Prefix cache hit rate: 11.INFO 01-04 14:20:04 [loggers.py:111] Engine 000: Avg prompt throughpuINFO: 10.43.30.5:0 - "POST /v1INFO 01-04 14:20:07 [loggers.pINFO 01-04 14:20:09 [loggers.py:111] Engine 000: Avg prompt throughput: 534.9 tokens/sINFO 01-04 14:20:14 [loggers.py:111] Engine 000: Avg prompt throughput: 453.0 tokens/s, Avg generation INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 257 |
+
INFO 01-04 14:20:17 [loggers.pINFO 01-04 14:20:19 [loggers.py:111] Engine 000: Avg prompt throughput: 646.6 tokens/INFO 01-04 14:20:24 [loggers.py:111] Engine 000: Avg prompt throughput: 617.5 tokens/s, Avg generation throughput: 75.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KVINFO 01-04 14:20:27 [loggersINFO 01-04 14:20:29 [INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 258 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 259 |
+
INFO 01-04 14:20:34 [loggers.py:111] Engine 000: Avg prompt throughput: 482.5 tokens/s, Avg generation INFO 01-04 14:20:37 [loggers.INFO 01-04 14:20:39 [loggers.py:111] Engine 000: Avg prompt throughput: 420.4 tokens/s,INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 260 |
+
INFO 01-04 14:20:44 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:20:49 [loggers.py:111] Engine 000: Avg prompt throughput: 451.3 tokens/s, Avg generation throughput: 53.8 tokens/s, Running: 2 reqs, WaitingINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OKINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 261 |
+
INFO 01-04 14:20:59 [loggers.py:111] Engine 000: Avg prompt throughput: 964.4 tokens/s, Avg generation throughput: 75.3 tokens/s, Running: 2 reqs, WaitingINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 262 |
+
IINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 263 |
+
INFO 01-04 14:21:09 [loggers.py:111] Engine 000: Avg prompt throughput: 697.4 tokens/s, Avg generation throughput: 104.3 tokens/s, Running: 2 reqs, WaINFO 01-04 14:21:14 [loggers.py:111] Engine 000: Avg prompt throughputINFO: 10.43.30.5:0 - "POST /vINFO 01-04 14:21:17 [loggers.pyINFO 01-04 14:21:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s,INFO 01-04 14:21:24 [loggers.py:111] Engine 000: Avg prompt throughput: 751.5 tokens/s, Avg generatioINFO 01-04 14:21:27 [loggers.py:111] Engine 000: Avg prompt throughput: 1347.5 tokens/s, Avg geINFO 01-04 14:21:29 [loggers.py:111] Engine 000: Avg prompt throughput: 184.8 tokens/s,INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 264 |
+
INFO 01-04 14:21:34 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:21:39 [loggers.py:111] Engine 000: Avg prompt throughput: 641.2 tokens/s, Avg generation throughput: 91.1 tokens/s, Running: 2 reqs, WaitINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 265 |
+
INFOINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 266 |
+
INFO 01-04 14:21:49 [loggers.py:111] Engine 000: Avg prompt throughput: 711.2 tokens/s, Avg generation throughput: 100.0 tokens/s, Running: 2 reqs, WaiINFO 01-04 14:21:54 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:21:59 [loggers.py:111] Engine 000: Avg prompt throughput: 577.5 tokens/s, Avg generation throughput: 101.3 tokens/s, Running: 3 reqs, WaINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 267 |
+
INFINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 268 |
+
INFO 01-04 14:22:09 [loggers.py:111] Engine 000: Avg prompt throughput: 222.2 tokens/s, Avg generation throughput: 118.2 tokens/s, Running: 3 reqs, WINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 269 |
+
INFOINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 270 |
+
INFO 01-04 14:22:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 120.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: INFO 0INFO 01-04 14:22:29 [loggers.py:111] Engine 000: Avg prompt throughput: 612.5 tokens/s, Avg generation throughput: 87.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.6%, Prefix cache hit rate: INFO 01-04 14:22:34 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:22:39 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 100.2 tokens/s, Running: 2 reqs, WINFO 01-04 14:22:44 [loggers.py:111] Engine 000: Avg prompt throughput:INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 271 |
+
INFO 01-04 14:22:49 [loggers.py:111] Engine 000: Avg prompt throughput: 531.4 tokeINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 272 |
+
INFO 01-04 14:22:54 [loggers.py:111] Engine 000: Avg prompt throughput: 829.4 tokens/s, Avg generation throughput: 100.2 tokens/s, Running:INFO 01-04 14:22:59 [loggers.py:111] Engine 000: Avg prompt throughput: 568.1 tokINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 273 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 274 |
+
INFO 0INFO 01-04 14:23:09 [loggers.py:111] Engine 000: Avg prompt throughput: 657.1 tokens/s, Avg generation throughput: 116.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.9%, Prefix cache hit rate:INFO 01-04 14:23:14 [loggers.py:111] Engine 000: Avg prompt throughput: 74INFO 01-04 14:23:19 [loggers.py:111] Engine 000: Avg prompt throughput: 807.2 tokens/s, Avg generation throughput: 97.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.7%, Prefix cache hit rateINFO: INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.INFO 01-04INFO 01-04 14:23:29 [loggers.py:111] Engine 000: Avg prompt throughput: 713.3 tokens/s, Avg generation throughput: 133.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.7%, Prefix cache hitINFO 01-04 14INFO 01-04 14:23:39 [loggers.py:111] Engine 000: Avg prompt throughput: 404.1 tokens/s, Avg generation throughput: 138.4 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 10.9%, Prefix cache hit rate: 11.3%
|
| 275 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTINFO 01-04 14:INFO 01-04 14:23:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 141.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.3%, Prefix cache hitINFO 01-04 14:23:54 [loggers.py:111] Engine 000: Avg prompt throughput: 569.3 tINFO 01-04 14:23:59 [loggers.py:111] Engine 000: Avg prompt throughput: 609.9 tokens/s, Avg generation throughput: 138.9 tokens/s, Running: INFO 01-04 14:24:04 [loggers.py:111] Engine 000: Avg prompt throughput: 735.8 toINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 276 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 277 |
+
INFO 01-04 14:24:09 [loggers.py:111] Engine 000: Avg prompt throughput: 90INFO 01-04 14:24:14 [loggers.py:111] Engine 000: Avg prompt throughput: 849.5 tokens/s, Avg generation throughput: 98.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 9.2%, Prefix cache hit rate: INFO 01-04 14:24:19 [loggers.py:111] Engine 000: Avg prompt throughput: 652.3 tokens/s, Avg generation throughput: 67.9 tokens/s, RunninINFO 01-04 14:24:24 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/INFO 01-04 14:24:29 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.7%, Prefix cacINFO 01-04 14:24:34INFO 01-04 14:24:39 [loggers.py:111] Engine 000: Avg prompt throughput: 728.4 tokens/s, Avg generation throughput: 107.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 10.0%, Prefix cINFO: 10.46.50.19INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 278 |
+
INFO 01-04 14:24:49 [loggers.py:111] Engine 000: Avg prompt throughput: 478.7 tokens/s, Avg generation throughput: 140.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 9.3%, PrefINFO 01-04 14:24:54 [loggers.py:111] Engine 000: Avg prompt throughput: 939.1 tokens/s, AINFO: 10INFO 01-04 14:24:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.INFO 01-04 14:24:59 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:25:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation INFO 01-04 14:25:07 [loggers.py:111] Engine 000: Avg INFO 01-04 14:25:09 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:25:14 [loggers.py:111] Engine 000: Avg prompt throughput: 712.9 tokens/s, Avg generation throughput: 25.6 tokens/s, Running: 1 reqs, Waiting: 0INFO 01-04 14:25:19 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:25:24 [loggers.py:111] Engine 000: Avg prompt throughput: 599.7 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KINFO: 10.45.190.192:0 - "POST /v1/completions INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 279 |
+
INFO 01-04 14:25:34 [loggers.py:111] Engine 000: Avg prompt throughput: 511.0 tokens/s, Avg generation throughput: 76.0 tokens/s, Running: 2 reqs, Waiting: 0INFO 01-04 14:25:39 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:25:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 91.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 11.5%
|
| 280 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 281 |
+
INFO 01-04 14:25:54 [loggers.py:111] Engine 000: Avg prompt throughput: 345.0 tokens/s, Avg generation throughput: 81.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 11.4%
|
| 282 |
+
INFO: INFO 01-04 14:25:59 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:26:04 [loggers.py:111] Engine 000: Avg prompt throughput: 577.1 tokens/s, Avg generation throughput: 77.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KINFO 01-04 14:26:07 [loggers.py:111] Engine 000: AINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 283 |
+
INFO 01-04 14:26:14 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generationINFO 01-04 14:26:17 [loggers.py:111] Engine 000: Avg prompt throughput: 798.1 tokens/s, Avg generation throughput: 84.2 toINFO 01-04 14:26:19 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:26:24 [loggers.py:111] EINFO 01-04 14:26:27 [loggers.py:111] Engine 000: Avg prompt throughput: 589.4 tokens/s, Avg generation throughput: 119.4INFO 01-04 14:26:29 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:26:34 [loggers.py:111] Engine 000: Avg prompt throughput: 827.0 tokens/s, Avg generation throINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 284 |
+
INFO 01-04 14:26:37 [loggers.py:111] Engine 00INFO 01-04 14:26:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 13.8%IINFO 01-04 14:26:54 [loggers.py:111] Engine 000: Avg prompt throughput: 975.8 tokens/s, Avg generation throughput: 71.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.6%, Prefix cache hit rate: 13.5%
|
| 285 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 286 |
+
INFO 01-04 14:27:04 [loggers.py:111] Engine 000: Avg prompt throughput: 809.6 tokens/s, Avg generation throughput: 100.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU INFO: 10.46.17.192:0 - "POST /v1/completions HTINFO 01-04 14:27:14 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 89.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.8%, Prefix cache hit rate: 13.3%
|
| 287 |
+
INFO: 10.46.1INFO 01-04 14:27:17 [loggers.py:111] Engine 000: Avg prompt throughput: 825.7 tokens/s, Avg generation throughput: INFO 01-04 14:27:24 [loggers.py:111] Engine 000: Avg prompt throughput: 1172.5 tokens/s, Avg generation throughput: 41.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPINFOINFO 01-04 14:27:27 [loggers.py:111] Engine 000: INFO 01-04 14:27:34 [loggers.py:111] Engine 000: Avg prompt throughput: 805.6 tokens/s, Avg generation throughput: 61.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU INFO: 10.45.190.192:0 - "POST /v1/completions HINFO 01-04 14:27:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 89.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.5%, Prefix cache hit rate: 12.7%
|
| 288 |
+
INFO: 10.46.INFO 01-04 14:27:47 [loggers.py:111] Engine 000: AINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 289 |
+
INFO 01-04 14:27:54 [loggers.py:111] Engine 000: Avg prompt throughput: 623.8 tokens/s, Avg generation INFO 01-04 14:27:57 [loggers.py:111] Engine 000: AINFO 01-04 14:27:59 [loggers.py:111] Engine 000: Avg prompt througINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 290 |
+
INFO 01-04 14:28:04 [loggers.py:111] Engine 000: Avg prompt throughput: 420.2 tokens/s,INFO: 10.43.30INFO: 10.43.30.5:0 - "POST /v1/completionsINFO 01-04 14:28:09 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:28:14 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, WaitinINFO: 10.45.INFO 01-04 14:28:17 [loggers.py:111] Engine 000: AIINFO 01-04 14:28:19 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:28:24 [loggers.py:111] Engine 000: Avg prompt throughput: 852.0 tokens/s, Avg generation INFO 01-04 14:28:27 [loggers.py:111] Engine 000: INFO 01-04 14:28:29 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:28:34 [loggers.py:111] Engine 000: Avg prompt throughput: 901.2 tokens/s, Avg generation throughput: 75.6 tokens/s, Running: 2 reqs, WaitiINFO 01-04 14:28:39 [loggers.py:111] Engine 000: Avg INFO 01-04 14:28:45 [loggers.py:111] Engine 000: Avg prompt throughput: 1033.5 tokens/s, Avg generation throughput: 25.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache uINFO 01-04 14:28:47 [loggers.py:111] EngiINFO 01-04 14:28:55 [loggers.py:111] Engine 000: Avg prompt throughput: 620.4 tokens/s, Avg generation throughput: 48.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cachINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 291 |
+
INFO 01-04 14:29:07 [loggers.py:111] Engine 000: Avg prompt throughput: 698.3 tokens/s, Avg generation throughput: 30.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.8%
|
| 292 |
+
INFO 01-04 14:29:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.8%
|
| 293 |
+
INFO 01-04 14:29:27 [loggers.py:111] Engine 000: Avg prompt throughput: 818.5 tokens/s, Avg generation throughput: 28.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 18.5%
|
| 294 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 295 |
+
INFO 01-04 14:29:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 8.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.5%
|
| 296 |
+
INFO 01-04 14:29:47 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:29:49 [loggers.py:111] Engine 000: Avg prompt throughput: 742.5 tokens/s, Avg generation throughput: 35.9 tokens/s, Running: 1 reqs, Waiting: 0 reqINFO 01-04 14:29:57 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:29:59 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs,INFO 01-04 14:30:07 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:30:09 [loggers.py:111] Engine 000: Avg prompt throughput: 758.9 tokens/s, Avg generation throughput: 84.0 tokens/s, Running: 2 reqs, Waiting: 0 reqINFO 01-04 14:30:17 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:30:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.4 tokens/s, Running: 2 reqs, Waiting: 0 reqINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 297 |
+
INFO 01-04 14:30:27 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:30:29 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 58.5 tokens/s, Running: 1 reqs, Waiting: 0 reqINFO 01-04 14:30:37 [loggers.py:111] Engine 000: Avg prompINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 298 |
+
INFO 01-04 14:30:39 [loggers.py:111] Engine 000: Avg prompt throughput: 573.3 tokens/s, Avg INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 299 |
+
INFO 01-04 14:30:47 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:30:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 INFO 01-04 14:30:57 [loggers.py:111] Engine 000: Avg prompt throughput: 758.2 tokens/s, Avg generation throughput: 37.2 tokens/INFO 01-04 14:30:59 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg geINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 300 |
+
INFO 01-04 14:31:07 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:31:09 [loggers.py:111] Engine 000: Avg prompt throughput: 778.2 tokens/s, Avg generation throughput: 27.9 tokens/s, Running: 1 reqs, Waiting: 0 INFO 01-04 14:31:17 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:31:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 rINFO 01-04 14:31:27 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:31:29 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 rINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 301 |
+
INFO 01-04 14:31:37 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:31:39 [loggers.py:111] Engine 000: Avg prompt throughput: 962.2 tokens/s, Avg generation throughput: 37.5 tokens/s, Running: 1 reqs, Waiting: 0INFO 01-04 14:31:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RINFO 01-04 14:31:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvINFO 01-04 14:31:57 [loggers.py:111] Engine 000: Avg prompt throughput: 500.0 tokens/s, Avg generation throughput: 30.0 tokens/sINFO 01-04 14:31:59 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg gINFO 01-04 14:32:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/INFO 01-04 14:32:09 [loggers.py:111] Engine 000: Avg prompt throughput: 937.4 tokens/s, Avg INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 302 |
+
INFO 01-04 14:32:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 36.5 tokens/s, INFO 01-04 14:32:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvgINFO 01-04 14:32:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RINFO 01-04 14:32:29 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvINFO 01-04 14:32:47 [loggers.py:111] Engine 000: Avg prompt throughput: 708.9 tokens/s, Avg generation throughput: 33.6 tokens/sINFO 01-04 14:33:29 [loggers.py:111] Engine 000: Avg prompt throughput: 527.0 tokens/s, Avg generation throughput: 23.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 13.7%
|
| 303 |
+
INFO 01-04 14:33:39 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 13.7%
|
| 304 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 305 |
+
INFO 01-04 14:33:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 39.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.7%
|
| 306 |
+
INFO 01-04 14:33:59 [loggers.pINFO 01-04 14:34:17 [loggers.py:111] Engine 000: Avg prompt throughput: 988.6 tokens/s, Avg generation throughput: 31.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache hit rate: 15.5%
|
| 307 |
+
INFO 01-04 14:34:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: 15.5%
|
| 308 |
+
INFO 01-04 14:34:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 15.5%
|
| 309 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 310 |
+
INFO 01-04 14:34:47 [loggers.py:111] Engine 000: Avg prompt throughput: 591.2 tokens/s, Avg generation throughput: 22.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 15.4%
|
| 311 |
+
INFO 01-04 14:34:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage:INFO 01-04 14:35:09 [loggers.py:111] Engine 000: Avg prompt throughput: 581.7 tokens/s, Avg generation throughput: 14.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:35:14 [loggers.py:111] Engine 000INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 312 |
+
INFO 01-04 14:35:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation througINFO 01-04 14:35:24 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughpuINFO 01-04 14:35:29 [loggers.py:1INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 313 |
+
INFO 01-04 14:36:37 [loggers.py:111] Engine 000: Avg prompt throughput: 601.2 tokens/s, Avg generation throughput: 44.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.3%
|
| 314 |
+
INFO 01-04 14:36:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.3%
|
| 315 |
+
INFO 01-04 14:38:27 [loggers.py:111] Engine 000: Avg prompt throughput: 630.3 tokens/s, Avg generation throughput: 17.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 15.1%
|
| 316 |
+
INFO 01-04 14:38:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 15.1%
|
| 317 |
+
INFO 01-04 14:38:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 15.1%
|
| 318 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 319 |
+
INFO 01-04 14:38:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 1.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.1%
|
| 320 |
+
INFO 01-04 14:39:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.1%
|
| 321 |
+
INFO 01-04 14:39:57 [loggers.py:111] Engine 000: Avg prompt throughput: 583.0 tokens/s, Avg generation throughput: 24.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 15.0%
|
| 322 |
+
INFO 01-04 14:40:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 15.0%
|
| 323 |
+
INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 324 |
+
INFO 01-04 14:40:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 43.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.0%
|
| 325 |
+
INFO 01-04 14:40:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.0%
|
| 326 |
+
INFO 01-04 14:41:57 [loggers.py:111] Engine 000: Avg prompt throughput: 609.1 tokens/s, Avg generation throughput: 45.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 14.8%
|
| 327 |
+
INFO 01-04 14:42:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 14.8%
|
| 328 |
+
INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 329 |
+
INFO 01-04 14:42:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 1.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.8%
|
| 330 |
+
INFO 01-04 14:42:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.8%
|
| 331 |
+
INFO 01-04 14:42:47 [loggers.py:111] Engine 000: Avg prompt throughput: 720.5 tokens/s, Avg generation throughput: 32.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 14.7%
|
| 332 |
+
INFO 01-04 14:42:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 14.7%
|
| 333 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 334 |
+
INFO 01-04 14:43:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 21.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.7%
|
| 335 |
+
INFO 01-04 14INFO 01-04 14:43:25 [loggers.py:111] Engine 000: Avg prompt throughput: 1802.9 tokens/s, Avg generation throughput: 20.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.2%, Prefix cacheINFO 01-04 14:47:27 [loggers.py:111] Engine 000: Avg prompt throughput: 773.2 tokens/s, Avg generation throughput: 38.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 15.4%
|
| 336 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 337 |
+
INFO 01-04 14:47:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.4%
|
| 338 |
+
INFO 01-04 14:47:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.4%
|
| 339 |
+
INFO 01-04 15:03:07 [loggers.py:111] Engine 000: Avg prompt throughput: 618.6 tokens/s, Avg generation throughput: 25.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 15.3%
|
| 340 |
+
INFO 01-04 15:03:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 15.3%
|
| 341 |
+
INFO 01-04 15:03:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 15.3%
|
| 342 |
+
INFO 01-04 15:03:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 15.3%
|
| 343 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 344 |
+
INFO 01-04 15:03:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 37.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.3%
|
| 345 |
+
INFO 01-04 15:03:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.3%
|
| 346 |
+
.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 347 |
+
INFO 01-04 14:47:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 20.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usageINFO 01-04 14:48:45 [loggers.py:111] Engine 000: Avg prompt throughput: 814.9 tokens/s, Avg generation throughput: 33.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 14.9%
|
| 348 |
+
INFO 01-04 14:48:55 [loggers.py:11INFO 01-04 14:54:17 [loggers.py:111] Engine 000: Avg prompt throughput: 420.7 tokens/s, Avg generation throughput: 10.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix cache hit rate: 19.3%
|
| 349 |
+
INFO 01-04 14:54:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, RunINFO 01-04 14:58:09 [loggers.py:111] Engine 000: Avg prompt throughput: 454.0 tokens/s, Avg generation throughput: 16.0 tokens/s, Running: 1 reqs, Waiting: INFO 01-04 14:59:06 [loggers.py:111] Engine 000: Avg prompt throughput: 573.5 tokens/s, Avg generation throughput: 32.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 16.5%
|
| 350 |
+
INFO 01-04 14:59:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 16.5%
|
| 351 |
+
INFO 01-04 14:59:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 16.5%
|
| 352 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 353 |
+
INFO 01-04 14:59:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 19.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.5%
|
| 354 |
+
INFO 01-04 14:59:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.5%
|
| 355 |
+
.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 13.5%
|
| 356 |
+
INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 357 |
+
INFO 01-04 14:50:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 10.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.5%
|
| 358 |
+
INFO 01-04 14:50:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.5%
|
| 359 |
+
15.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.0%
|
| 360 |
+
INFO 01-04 14:48:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.0%
|
| 361 |
+
INFO 01-04 14:59:34 [loggers.py:111] Engine 000: Avg prompt throughput: 579.9 tokens/s, Avg generation throughput: 14.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 14.8%
|
| 362 |
+
INFO 01-04 14:59:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 14.8%
|
| 363 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 364 |
+
INFO 01-04 14:59:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 26.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.8%
|
| 365 |
+
INFO 01-04 15:00:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.8%
|
| 366 |
+
y:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 31.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.3%
|
| 367 |
+
INFO 01-04 14:47:11 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.3%
|
| 368 |
+
INFO 01-04 14:48:11 [loggers.py:111] Engine 000: Avg prompt throughput: 797.4 tokens/s, Avg generation throughput: 21.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 14.1%
|
| 369 |
+
INFO 01-04 14:48:21 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 14.1%
|
| 370 |
+
INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 371 |
+
INFO 01-04 14:48:31 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 13.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.1%
|
| 372 |
+
INFO 01-04 14:48:41 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.1%
|
| 373 |
+
INFO 01-04 14:57:21 [loggers.py:111] Engine 000: Avg prompt throughput: 554.8 tokens/s, Avg generation throughput: 2.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 14.5%
|
| 374 |
+
INFO 01-04 14:57:31 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 14.5%
|
| 375 |
+
INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
|
| 376 |
+
INFO 01-04 14:57:41 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 44.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.5%
|
| 377 |
+
INFO 01-04 14:57:51 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.5%
|
hf_ip/vllm_instances.txt
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
127.0.0.1:8001
|
| 2 |
+
127.0.0.1:8002
|
| 3 |
+
127.0.0.1:8003
|
| 4 |
+
127.0.0.1:8004
|
| 5 |
+
127.0.0.1:8005
|
| 6 |
+
127.0.0.1:8006
|
| 7 |
+
127.0.0.1:8007
|
| 8 |
+
127.0.0.1:8008
|
| 9 |
+
127.0.0.1:8001
|
| 10 |
+
127.0.0.1:8002
|
| 11 |
+
127.0.0.1:8003
|
| 12 |
+
127.0.0.1:8004
|
| 13 |
+
127.0.0.1:8005
|
| 14 |
+
127.0.0.1:8006
|
| 15 |
+
127.0.0.1:8007
|
| 16 |
+
127.0.0.1:8008
|
| 17 |
+
127.0.0.1:8001
|
| 18 |
+
127.0.0.1:8002
|
| 19 |
+
127.0.0.1:8003
|
| 20 |
+
127.0.0.1:8004
|
| 21 |
+
127.0.0.1:8005
|
| 22 |
+
127.0.0.1:8006
|
| 23 |
+
127.0.0.1:8007
|
| 24 |
+
127.0.0.1:8008
|
| 25 |
+
127.0.0.1:8001
|
| 26 |
+
127.0.0.1:8002
|
| 27 |
+
127.0.0.1:8001
|
| 28 |
+
127.0.0.1:8003
|
| 29 |
+
127.0.0.1:8002
|
| 30 |
+
127.0.0.1:8004
|
| 31 |
+
127.0.0.1:8003
|
| 32 |
+
127.0.0.1:8005
|
| 33 |
+
127.0.0.1:8004
|
| 34 |
+
127.0.0.1:8001
|
| 35 |
+
127.0.0.1:8001
|
| 36 |
+
127.0.0.1:8006
|
| 37 |
+
127.0.0.1:8005
|
| 38 |
+
127.0.0.1:8002
|
| 39 |
+
127.0.0.1:8001
|
| 40 |
+
127.0.0.1:8002
|
| 41 |
+
127.0.0.1:8007
|
| 42 |
+
127.0.0.1:8001
|
| 43 |
+
127.0.0.1:8006
|
| 44 |
+
127.0.0.1:8003
|
| 45 |
+
127.0.0.1:8002
|
| 46 |
+
127.0.0.1:8003
|
| 47 |
+
127.0.0.1:8008
|
| 48 |
+
127.0.0.1:8002
|
| 49 |
+
127.0.0.1:8007
|
| 50 |
+
127.0.0.1:8004
|
| 51 |
+
127.0.0.1:8003
|
| 52 |
+
127.0.0.1:8001
|
| 53 |
+
127.0.0.1:8004
|
| 54 |
+
127.0.0.1:8003
|
| 55 |
+
127.0.0.1:8008
|
| 56 |
+
127.0.0.1:8005
|
| 57 |
+
127.0.0.1:8001
|
| 58 |
+
127.0.0.1:8004
|
| 59 |
+
127.0.0.1:8002
|
| 60 |
+
127.0.0.1:8005
|
| 61 |
+
127.0.0.1:8004
|
| 62 |
+
127.0.0.1:8006
|
| 63 |
+
127.0.0.1:8002
|
| 64 |
+
127.0.0.1:8005
|
| 65 |
+
127.0.0.1:8003
|
| 66 |
+
127.0.0.1:8006
|
| 67 |
+
127.0.0.1:8005
|
| 68 |
+
127.0.0.1:8007
|
| 69 |
+
127.0.0.1:8003
|
| 70 |
+
127.0.0.1:8006
|
| 71 |
+
127.0.0.1:8004
|
| 72 |
+
127.0.0.1:8007
|
| 73 |
+
127.0.0.1:8006
|
| 74 |
+
127.0.0.1:8008
|
| 75 |
+
127.0.0.1:8004
|
| 76 |
+
127.0.0.1:8007
|
| 77 |
+
127.0.0.1:8005
|
| 78 |
+
127.0.0.1:8008
|
| 79 |
+
127.0.0.1:8007
|
| 80 |
+
127.0.0.1:8005
|
| 81 |
+
127.0.0.1:8008
|
| 82 |
+
127.0.0.1:8006
|
| 83 |
+
127.0.0.1:8008
|
| 84 |
+
127.0.0.1:8006
|
| 85 |
+
127.0.0.1:8007
|
| 86 |
+
127.0.0.1:8007
|
| 87 |
+
127.0.0.1:8008
|
| 88 |
+
127.0.0.1:8008
|
| 89 |
+
127.0.0.1:8001
|
| 90 |
+
127.0.0.1:8001
|
| 91 |
+
127.0.0.1:8001
|
| 92 |
+
127.0.0.1:8002
|
| 93 |
+
127.0.0.1:8002
|
| 94 |
+
127.0.0.1:8002
|
| 95 |
+
127.0.0.1:8003
|
| 96 |
+
127.0.0.1:8003
|
| 97 |
+
127.0.0.1:8003
|
| 98 |
+
127.0.0.1:8004
|
| 99 |
+
127.0.0.1:8004
|
| 100 |
+
127.0.0.1:8004
|
| 101 |
+
127.0.0.1:8005
|
| 102 |
+
127.0.0.1:8005
|
| 103 |
+
127.0.0.1:8005
|
| 104 |
+
127.0.0.1:8006
|
| 105 |
+
127.0.0.1:8006
|
| 106 |
+
127.0.0.1:8006
|
| 107 |
+
127.0.0.1:8007
|
| 108 |
+
127.0.0.1:8007
|
| 109 |
+
127.0.0.1:8007
|
| 110 |
+
127.0.0.1:8008
|
| 111 |
+
127.0.0.1:8008
|
| 112 |
+
127.0.0.1:8008
|
| 113 |
+
127.0.0.1:8001
|
| 114 |
+
127.0.0.1:8002
|
| 115 |
+
127.0.0.1:8003
|
| 116 |
+
127.0.0.1:8004
|
| 117 |
+
127.0.0.1:8005
|
| 118 |
+
127.0.0.1:8006
|
| 119 |
+
127.0.0.1:8001
|
| 120 |
+
127.0.0.1:8007
|
| 121 |
+
127.0.0.1:8002
|
| 122 |
+
127.0.0.1:8008
|
| 123 |
+
127.0.0.1:8003
|
| 124 |
+
127.0.0.1:8004
|
| 125 |
+
127.0.0.1:8001
|
| 126 |
+
127.0.0.1:8005
|
| 127 |
+
127.0.0.1:8002
|
| 128 |
+
127.0.0.1:8006
|
| 129 |
+
127.0.0.1:8001
|
| 130 |
+
127.0.0.1:8003
|
| 131 |
+
127.0.0.1:8007
|
| 132 |
+
127.0.0.1:8002
|
| 133 |
+
127.0.0.1:8001
|
| 134 |
+
127.0.0.1:8004
|
| 135 |
+
127.0.0.1:8008
|
| 136 |
+
127.0.0.1:8003
|
| 137 |
+
127.0.0.1:8002
|
| 138 |
+
127.0.0.1:8005
|
| 139 |
+
127.0.0.1:8004
|
| 140 |
+
127.0.0.1:8003
|
| 141 |
+
127.0.0.1:8006
|
| 142 |
+
127.0.0.1:8005
|
| 143 |
+
127.0.0.1:8004
|
| 144 |
+
127.0.0.1:8007
|
| 145 |
+
127.0.0.1:8006
|
| 146 |
+
127.0.0.1:8005
|
| 147 |
+
127.0.0.1:8008
|
| 148 |
+
127.0.0.1:8007
|
| 149 |
+
127.0.0.1:8006
|
| 150 |
+
127.0.0.1:8008
|
| 151 |
+
127.0.0.1:8007
|
| 152 |
+
127.0.0.1:8008
|
| 153 |
+
127.0.0.1:8001
|
| 154 |
+
127.0.0.1:8002
|
| 155 |
+
127.0.0.1:8003
|
| 156 |
+
127.0.0.1:8004
|
| 157 |
+
127.0.0.1:8005
|
| 158 |
+
127.0.0.1:8006
|
| 159 |
+
127.0.0.1:8007
|
| 160 |
+
127.0.0.1:8008
|
| 161 |
+
127.0.0.1:8001
|
| 162 |
+
127.0.0.1:8002
|
| 163 |
+
127.0.0.1:8003
|
| 164 |
+
127.0.0.1:8004
|
| 165 |
+
127.0.0.1:8005
|
| 166 |
+
127.0.0.1:8006
|
| 167 |
+
127.0.0.1:8007
|
| 168 |
+
127.0.0.1:8008
|
| 169 |
+
127.0.0.1:8001
|
| 170 |
+
127.0.0.1:8002
|
| 171 |
+
127.0.0.1:8003
|
| 172 |
+
127.0.0.1:8004
|
| 173 |
+
127.0.0.1:8005
|
| 174 |
+
127.0.0.1:8006
|
| 175 |
+
127.0.0.1:8007
|
| 176 |
+
127.0.0.1:8008
|
| 177 |
+
127.0.0.1:8001
|
| 178 |
+
127.0.0.1:8001
|
| 179 |
+
127.0.0.1:8002
|
| 180 |
+
127.0.0.1:8002
|
| 181 |
+
127.0.0.1:8003
|
| 182 |
+
127.0.0.1:8003
|
| 183 |
+
127.0.0.1:8004
|
| 184 |
+
127.0.0.1:8004
|
| 185 |
+
127.0.0.1:8005
|
| 186 |
+
127.0.0.1:8005
|
| 187 |
+
127.0.0.1:8006
|
| 188 |
+
127.0.0.1:8006
|
| 189 |
+
127.0.0.1:8007
|
| 190 |
+
127.0.0.1:8007
|
| 191 |
+
127.0.0.1:8008
|
| 192 |
+
127.0.0.1:8008
|
| 193 |
+
127.0.0.1:8001
|
| 194 |
+
127.0.0.1:8001
|
| 195 |
+
127.0.0.1:8002
|
| 196 |
+
127.0.0.1:8002
|
| 197 |
+
127.0.0.1:8003
|
| 198 |
+
127.0.0.1:8001
|
| 199 |
+
127.0.0.1:8003
|
| 200 |
+
127.0.0.1:8004
|
| 201 |
+
127.0.0.1:8002
|
| 202 |
+
127.0.0.1:8004
|
| 203 |
+
127.0.0.1:8005
|
| 204 |
+
127.0.0.1:8003
|
| 205 |
+
127.0.0.1:8005
|
| 206 |
+
127.0.0.1:8006
|
| 207 |
+
127.0.0.1:8004
|
| 208 |
+
127.0.0.1:8006
|
| 209 |
+
127.0.0.1:8007
|
| 210 |
+
127.0.0.1:8005
|
| 211 |
+
127.0.0.1:8007
|
| 212 |
+
127.0.0.1:8008
|
| 213 |
+
127.0.0.1:8006
|
| 214 |
+
127.0.0.1:8008
|
| 215 |
+
127.0.0.1:8007
|
| 216 |
+
127.0.0.1:8008
|
| 217 |
+
127.0.0.1:8001
|
| 218 |
+
127.0.0.1:8002
|
| 219 |
+
127.0.0.1:8001
|
| 220 |
+
127.0.0.1:8003
|
| 221 |
+
127.0.0.1:8002
|
| 222 |
+
127.0.0.1:8004
|
| 223 |
+
127.0.0.1:8003
|
| 224 |
+
127.0.0.1:8005
|
| 225 |
+
127.0.0.1:8004
|
| 226 |
+
127.0.0.1:8006
|
| 227 |
+
127.0.0.1:8005
|
| 228 |
+
127.0.0.1:8007
|
| 229 |
+
127.0.0.1:8006
|
| 230 |
+
127.0.0.1:8008
|
| 231 |
+
127.0.0.1:8007
|
| 232 |
+
127.0.0.1:8008
|