ADOHAHA123 commited on
Commit
316d145
·
verified ·
1 Parent(s): f96d103

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ hf/tokenizer.json filter=lfs diff=lfs merge=lfs -text
data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c378db65a2203aa8a78056fbcf7fd2d9f96a608110397a3ee932d74213ac7c9
3
+ size 1492
hf/added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
hf/chat_template.jinja ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
21
+ {%- set ns.multi_step_tool = false %}
22
+ {%- set ns.last_query_index = index %}
23
+ {%- endif %}
24
+ {%- endfor %}
25
+ {%- for message in messages %}
26
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
27
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
28
+ {%- elif message.role == "assistant" %}
29
+ {%- set content = message.content %}
30
+ {%- set reasoning_content = '' %}
31
+ {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
32
+ {%- set reasoning_content = message.reasoning_content %}
33
+ {%- else %}
34
+ {%- if '</think>' in message.content %}
35
+ {%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
36
+ {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
37
+ {%- endif %}
38
+ {%- endif %}
39
+ {%- if loop.index0 > ns.last_query_index %}
40
+ {%- if loop.last or (not loop.last and reasoning_content) %}
41
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
42
+ {%- else %}
43
+ {{- '<|im_start|>' + message.role + '\n' + content }}
44
+ {%- endif %}
45
+ {%- else %}
46
+ {{- '<|im_start|>' + message.role + '\n' + content }}
47
+ {%- endif %}
48
+ {%- if message.tool_calls %}
49
+ {%- for tool_call in message.tool_calls %}
50
+ {%- if (loop.first and content) or (not loop.first) %}
51
+ {{- '\n' }}
52
+ {%- endif %}
53
+ {%- if tool_call.function %}
54
+ {%- set tool_call = tool_call.function %}
55
+ {%- endif %}
56
+ {{- '<tool_call>\n{"name": "' }}
57
+ {{- tool_call.name }}
58
+ {{- '", "arguments": ' }}
59
+ {%- if tool_call.arguments is string %}
60
+ {{- tool_call.arguments }}
61
+ {%- else %}
62
+ {{- tool_call.arguments | tojson }}
63
+ {%- endif %}
64
+ {{- '}\n</tool_call>' }}
65
+ {%- endfor %}
66
+ {%- endif %}
67
+ {{- '<|im_end|>\n' }}
68
+ {%- elif message.role == "tool" %}
69
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
70
+ {{- '<|im_start|>user' }}
71
+ {%- endif %}
72
+ {{- '\n<tool_response>\n' }}
73
+ {{- message.content }}
74
+ {{- '\n</tool_response>' }}
75
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
76
+ {{- '<|im_end|>\n' }}
77
+ {%- endif %}
78
+ {%- endif %}
79
+ {%- endfor %}
80
+ {%- if add_generation_prompt %}
81
+ {{- '<|im_start|>assistant\n' }}
82
+ {%- if enable_thinking is defined and enable_thinking is false %}
83
+ {{- '<think>\n\n</think>\n\n' }}
84
+ {%- endif %}
85
+ {%- endif %}
hf/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "eos_token_id": 151645,
8
+ "head_dim": 128,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 5120,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 25600,
13
+ "max_position_embeddings": 131072,
14
+ "max_window_layers": 64,
15
+ "model_type": "qwen3",
16
+ "num_attention_heads": 64,
17
+ "num_hidden_layers": 64,
18
+ "num_key_value_heads": 8,
19
+ "pad_token_id": 151643,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": {
22
+ "factor": 4.0,
23
+ "original_max_position_embeddings": 32768,
24
+ "rope_type": "yarn"
25
+ },
26
+ "rope_theta": 1000000,
27
+ "sliding_window": null,
28
+ "tie_word_embeddings": false,
29
+ "torch_dtype": "bfloat16",
30
+ "transformers_version": "4.52.4",
31
+ "use_cache": true,
32
+ "use_sliding_window": false,
33
+ "vocab_size": 151936
34
+ }
hf/generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.52.4"
13
+ }
hf/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
hf/model-00001-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30e6f85e21a0e9d3483e8bb9e6010c80a8e797719552329ae795f76a26b52447
3
+ size 4928419424
hf/model-00002-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99db49822e9f64fd5502fd0d42e2e5a6fb43df4206fe161cb1c8120e32670637
3
+ size 4781605144
hf/model-00003-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e07a88f5427346dfe5224e4b2c81368b18a293ae92b747c1e1a0cd18411032c5
3
+ size 4928450568
hf/model-00004-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5888990fba12830a1c24bc6cc470697284f57fa3ff9bc68abb2f27c15ec9ae08
3
+ size 4980813680
hf/model-00005-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:493e7423c2354cdc29e88cb6bbd439b9fe0a0c5dc44c9df7f912ef36f3de6997
3
+ size 4991315040
hf/model-00006-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:540b426e81ddf3232e20258648997d52ece0532c0d3b88742c0dcd232beedda3
3
+ size 4949367504
hf/model-00007-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39b294addd72f12cb2c04c6ff91856a20f48ffc5e5300508ee3aa261a51b86c0
3
+ size 4854996904
hf/model-00008-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a5971c2dc9c302a74844a6a8c0ac3c18776892563d5d07c6d473dbdd2e46d5b
3
+ size 3565289480
hf/model-00009-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f948de65d6c0c4da9aa5c7b1b5634c10a92dd422f494c9554ffe06b9a68cd758
3
+ size 4890335312
hf/model-00010-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b48a6226ab1fead18741840d0c6c2ff7a713a182210468b5861586b9a05656c7
3
+ size 4785473976
hf/model-00011-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:307afed4ca7ac0aa48c4faf376bb3d654e143f2a6d4145b77416d8afc06cca2a
3
+ size 4834025448
hf/model-00012-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0533a6fe98af4e719b356da4f5a996defcd6614f66fbd5c3787497ef0f9894e
3
+ size 4792103184
hf/model-00013-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47c9088b8ab86b8c928cdfbb9a38e390e0a0ac716285e348e5ed4fcfd0854c70
3
+ size 4949465424
hf/model-00014-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31d997e89fb92893911b4214b4a51f7c6ed4ab25fccf47488ca5373b964e66e9
3
+ size 3292667304
hf/model.safetensors.index.json ADDED
@@ -0,0 +1,714 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 65524246528
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00009-of-00014.safetensors",
7
+ "model.embed_tokens.weight": "model-00010-of-00014.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00008-of-00014.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
13
+ "model.layers.0.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
14
+ "model.layers.0.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
15
+ "model.layers.0.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
16
+ "model.layers.0.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
17
+ "model.layers.0.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
18
+ "model.layers.0.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
19
+ "model.layers.1.input_layernorm.weight": "model-00010-of-00014.safetensors",
20
+ "model.layers.1.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
21
+ "model.layers.1.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
22
+ "model.layers.1.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
23
+ "model.layers.1.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
24
+ "model.layers.1.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
25
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
26
+ "model.layers.1.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
27
+ "model.layers.1.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
28
+ "model.layers.1.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
29
+ "model.layers.1.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
30
+ "model.layers.10.input_layernorm.weight": "model-00005-of-00014.safetensors",
31
+ "model.layers.10.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
32
+ "model.layers.10.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
33
+ "model.layers.10.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
34
+ "model.layers.10.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
35
+ "model.layers.10.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
36
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
37
+ "model.layers.10.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
38
+ "model.layers.10.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
39
+ "model.layers.10.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
40
+ "model.layers.10.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
41
+ "model.layers.11.input_layernorm.weight": "model-00006-of-00014.safetensors",
42
+ "model.layers.11.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
43
+ "model.layers.11.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
44
+ "model.layers.11.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
45
+ "model.layers.11.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
46
+ "model.layers.11.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
47
+ "model.layers.11.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
48
+ "model.layers.11.self_attn.o_proj.weight": "model-00014-of-00014.safetensors",
49
+ "model.layers.11.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
50
+ "model.layers.11.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
51
+ "model.layers.11.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
52
+ "model.layers.12.input_layernorm.weight": "model-00013-of-00014.safetensors",
53
+ "model.layers.12.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
54
+ "model.layers.12.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
55
+ "model.layers.12.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
56
+ "model.layers.12.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
57
+ "model.layers.12.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
58
+ "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
59
+ "model.layers.12.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
60
+ "model.layers.12.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
61
+ "model.layers.12.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
62
+ "model.layers.12.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
63
+ "model.layers.13.input_layernorm.weight": "model-00004-of-00014.safetensors",
64
+ "model.layers.13.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
65
+ "model.layers.13.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
66
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
67
+ "model.layers.13.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
68
+ "model.layers.13.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
69
+ "model.layers.13.self_attn.k_proj.weight": "model-00014-of-00014.safetensors",
70
+ "model.layers.13.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
71
+ "model.layers.13.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
72
+ "model.layers.13.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
73
+ "model.layers.13.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
74
+ "model.layers.14.input_layernorm.weight": "model-00013-of-00014.safetensors",
75
+ "model.layers.14.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
76
+ "model.layers.14.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
77
+ "model.layers.14.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
78
+ "model.layers.14.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
79
+ "model.layers.14.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
80
+ "model.layers.14.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
81
+ "model.layers.14.self_attn.o_proj.weight": "model-00004-of-00014.safetensors",
82
+ "model.layers.14.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
83
+ "model.layers.14.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
84
+ "model.layers.14.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
85
+ "model.layers.15.input_layernorm.weight": "model-00012-of-00014.safetensors",
86
+ "model.layers.15.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
87
+ "model.layers.15.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
88
+ "model.layers.15.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
89
+ "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
90
+ "model.layers.15.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
91
+ "model.layers.15.self_attn.k_proj.weight": "model-00010-of-00014.safetensors",
92
+ "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
93
+ "model.layers.15.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
94
+ "model.layers.15.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
95
+ "model.layers.15.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
96
+ "model.layers.16.input_layernorm.weight": "model-00013-of-00014.safetensors",
97
+ "model.layers.16.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
98
+ "model.layers.16.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
99
+ "model.layers.16.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
100
+ "model.layers.16.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
101
+ "model.layers.16.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
102
+ "model.layers.16.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
103
+ "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
104
+ "model.layers.16.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
105
+ "model.layers.16.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
106
+ "model.layers.16.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
107
+ "model.layers.17.input_layernorm.weight": "model-00013-of-00014.safetensors",
108
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
109
+ "model.layers.17.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
110
+ "model.layers.17.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
111
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
112
+ "model.layers.17.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
113
+ "model.layers.17.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
114
+ "model.layers.17.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
115
+ "model.layers.17.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
116
+ "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
117
+ "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
118
+ "model.layers.18.input_layernorm.weight": "model-00001-of-00014.safetensors",
119
+ "model.layers.18.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
120
+ "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
121
+ "model.layers.18.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
122
+ "model.layers.18.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
123
+ "model.layers.18.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
124
+ "model.layers.18.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
125
+ "model.layers.18.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
126
+ "model.layers.18.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
127
+ "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
128
+ "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
129
+ "model.layers.19.input_layernorm.weight": "model-00014-of-00014.safetensors",
130
+ "model.layers.19.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
131
+ "model.layers.19.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
132
+ "model.layers.19.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
133
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
134
+ "model.layers.19.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
135
+ "model.layers.19.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
136
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
137
+ "model.layers.19.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
138
+ "model.layers.19.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
139
+ "model.layers.19.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
140
+ "model.layers.2.input_layernorm.weight": "model-00008-of-00014.safetensors",
141
+ "model.layers.2.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
142
+ "model.layers.2.mlp.gate_proj.weight": "model-00003-of-00014.safetensors",
143
+ "model.layers.2.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
144
+ "model.layers.2.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
145
+ "model.layers.2.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
146
+ "model.layers.2.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
147
+ "model.layers.2.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
148
+ "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
149
+ "model.layers.2.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
150
+ "model.layers.2.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
151
+ "model.layers.20.input_layernorm.weight": "model-00006-of-00014.safetensors",
152
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
153
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
154
+ "model.layers.20.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
155
+ "model.layers.20.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
156
+ "model.layers.20.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
157
+ "model.layers.20.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
158
+ "model.layers.20.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
159
+ "model.layers.20.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
160
+ "model.layers.20.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
161
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
162
+ "model.layers.21.input_layernorm.weight": "model-00004-of-00014.safetensors",
163
+ "model.layers.21.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
164
+ "model.layers.21.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
165
+ "model.layers.21.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
166
+ "model.layers.21.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
167
+ "model.layers.21.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
168
+ "model.layers.21.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
169
+ "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
170
+ "model.layers.21.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
171
+ "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
172
+ "model.layers.21.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
173
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00014.safetensors",
174
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
175
+ "model.layers.22.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
176
+ "model.layers.22.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
177
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
178
+ "model.layers.22.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
179
+ "model.layers.22.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
180
+ "model.layers.22.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
181
+ "model.layers.22.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
182
+ "model.layers.22.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
183
+ "model.layers.22.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
184
+ "model.layers.23.input_layernorm.weight": "model-00009-of-00014.safetensors",
185
+ "model.layers.23.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
186
+ "model.layers.23.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
187
+ "model.layers.23.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
188
+ "model.layers.23.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
189
+ "model.layers.23.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
190
+ "model.layers.23.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
191
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
192
+ "model.layers.23.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
193
+ "model.layers.23.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
194
+ "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
195
+ "model.layers.24.input_layernorm.weight": "model-00013-of-00014.safetensors",
196
+ "model.layers.24.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
197
+ "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
198
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
199
+ "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
200
+ "model.layers.24.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
201
+ "model.layers.24.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
202
+ "model.layers.24.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
203
+ "model.layers.24.self_attn.q_norm.weight": "model-00004-of-00014.safetensors",
204
+ "model.layers.24.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
205
+ "model.layers.24.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
206
+ "model.layers.25.input_layernorm.weight": "model-00008-of-00014.safetensors",
207
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
208
+ "model.layers.25.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
209
+ "model.layers.25.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
210
+ "model.layers.25.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
211
+ "model.layers.25.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
212
+ "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
213
+ "model.layers.25.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
214
+ "model.layers.25.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
215
+ "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
216
+ "model.layers.25.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
217
+ "model.layers.26.input_layernorm.weight": "model-00014-of-00014.safetensors",
218
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
219
+ "model.layers.26.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
220
+ "model.layers.26.mlp.up_proj.weight": "model-00009-of-00014.safetensors",
221
+ "model.layers.26.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
222
+ "model.layers.26.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
223
+ "model.layers.26.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
224
+ "model.layers.26.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
225
+ "model.layers.26.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
226
+ "model.layers.26.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
227
+ "model.layers.26.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
228
+ "model.layers.27.input_layernorm.weight": "model-00009-of-00014.safetensors",
229
+ "model.layers.27.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
230
+ "model.layers.27.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
231
+ "model.layers.27.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
232
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
233
+ "model.layers.27.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
234
+ "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
235
+ "model.layers.27.self_attn.o_proj.weight": "model-00014-of-00014.safetensors",
236
+ "model.layers.27.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
237
+ "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
238
+ "model.layers.27.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
239
+ "model.layers.28.input_layernorm.weight": "model-00004-of-00014.safetensors",
240
+ "model.layers.28.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
241
+ "model.layers.28.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
242
+ "model.layers.28.mlp.up_proj.weight": "model-00014-of-00014.safetensors",
243
+ "model.layers.28.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
244
+ "model.layers.28.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
245
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
246
+ "model.layers.28.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
247
+ "model.layers.28.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
248
+ "model.layers.28.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
249
+ "model.layers.28.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
250
+ "model.layers.29.input_layernorm.weight": "model-00013-of-00014.safetensors",
251
+ "model.layers.29.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
252
+ "model.layers.29.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
253
+ "model.layers.29.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
254
+ "model.layers.29.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
255
+ "model.layers.29.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
256
+ "model.layers.29.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
257
+ "model.layers.29.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
258
+ "model.layers.29.self_attn.q_norm.weight": "model-00010-of-00014.safetensors",
259
+ "model.layers.29.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
260
+ "model.layers.29.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
261
+ "model.layers.3.input_layernorm.weight": "model-00011-of-00014.safetensors",
262
+ "model.layers.3.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
263
+ "model.layers.3.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
264
+ "model.layers.3.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
265
+ "model.layers.3.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
266
+ "model.layers.3.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
267
+ "model.layers.3.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
268
+ "model.layers.3.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
269
+ "model.layers.3.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
270
+ "model.layers.3.self_attn.q_proj.weight": "model-00013-of-00014.safetensors",
271
+ "model.layers.3.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
272
+ "model.layers.30.input_layernorm.weight": "model-00013-of-00014.safetensors",
273
+ "model.layers.30.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
274
+ "model.layers.30.mlp.gate_proj.weight": "model-00012-of-00014.safetensors",
275
+ "model.layers.30.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
276
+ "model.layers.30.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
277
+ "model.layers.30.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
278
+ "model.layers.30.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
279
+ "model.layers.30.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
280
+ "model.layers.30.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
281
+ "model.layers.30.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
282
+ "model.layers.30.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
283
+ "model.layers.31.input_layernorm.weight": "model-00004-of-00014.safetensors",
284
+ "model.layers.31.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
285
+ "model.layers.31.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
286
+ "model.layers.31.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
287
+ "model.layers.31.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
288
+ "model.layers.31.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
289
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
290
+ "model.layers.31.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
291
+ "model.layers.31.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
292
+ "model.layers.31.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
293
+ "model.layers.31.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
294
+ "model.layers.32.input_layernorm.weight": "model-00013-of-00014.safetensors",
295
+ "model.layers.32.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
296
+ "model.layers.32.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
297
+ "model.layers.32.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
298
+ "model.layers.32.post_attention_layernorm.weight": "model-00010-of-00014.safetensors",
299
+ "model.layers.32.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
300
+ "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
301
+ "model.layers.32.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
302
+ "model.layers.32.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
303
+ "model.layers.32.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
304
+ "model.layers.32.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
305
+ "model.layers.33.input_layernorm.weight": "model-00006-of-00014.safetensors",
306
+ "model.layers.33.mlp.down_proj.weight": "model-00014-of-00014.safetensors",
307
+ "model.layers.33.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
308
+ "model.layers.33.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
309
+ "model.layers.33.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
310
+ "model.layers.33.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
311
+ "model.layers.33.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
312
+ "model.layers.33.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
313
+ "model.layers.33.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
314
+ "model.layers.33.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
315
+ "model.layers.33.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
316
+ "model.layers.34.input_layernorm.weight": "model-00005-of-00014.safetensors",
317
+ "model.layers.34.mlp.down_proj.weight": "model-00013-of-00014.safetensors",
318
+ "model.layers.34.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
319
+ "model.layers.34.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
320
+ "model.layers.34.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
321
+ "model.layers.34.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
322
+ "model.layers.34.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
323
+ "model.layers.34.self_attn.o_proj.weight": "model-00007-of-00014.safetensors",
324
+ "model.layers.34.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
325
+ "model.layers.34.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
326
+ "model.layers.34.self_attn.v_proj.weight": "model-00014-of-00014.safetensors",
327
+ "model.layers.35.input_layernorm.weight": "model-00003-of-00014.safetensors",
328
+ "model.layers.35.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
329
+ "model.layers.35.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
330
+ "model.layers.35.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
331
+ "model.layers.35.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
332
+ "model.layers.35.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
333
+ "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
334
+ "model.layers.35.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
335
+ "model.layers.35.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
336
+ "model.layers.35.self_attn.q_proj.weight": "model-00010-of-00014.safetensors",
337
+ "model.layers.35.self_attn.v_proj.weight": "model-00008-of-00014.safetensors",
338
+ "model.layers.36.input_layernorm.weight": "model-00007-of-00014.safetensors",
339
+ "model.layers.36.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
340
+ "model.layers.36.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
341
+ "model.layers.36.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
342
+ "model.layers.36.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
343
+ "model.layers.36.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
344
+ "model.layers.36.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
345
+ "model.layers.36.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
346
+ "model.layers.36.self_attn.q_norm.weight": "model-00014-of-00014.safetensors",
347
+ "model.layers.36.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
348
+ "model.layers.36.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
349
+ "model.layers.37.input_layernorm.weight": "model-00013-of-00014.safetensors",
350
+ "model.layers.37.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
351
+ "model.layers.37.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
352
+ "model.layers.37.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
353
+ "model.layers.37.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
354
+ "model.layers.37.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
355
+ "model.layers.37.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
356
+ "model.layers.37.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
357
+ "model.layers.37.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
358
+ "model.layers.37.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
359
+ "model.layers.37.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
360
+ "model.layers.38.input_layernorm.weight": "model-00001-of-00014.safetensors",
361
+ "model.layers.38.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
362
+ "model.layers.38.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
363
+ "model.layers.38.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
364
+ "model.layers.38.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
365
+ "model.layers.38.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
366
+ "model.layers.38.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
367
+ "model.layers.38.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
368
+ "model.layers.38.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
369
+ "model.layers.38.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
370
+ "model.layers.38.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
371
+ "model.layers.39.input_layernorm.weight": "model-00003-of-00014.safetensors",
372
+ "model.layers.39.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
373
+ "model.layers.39.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
374
+ "model.layers.39.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
375
+ "model.layers.39.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
376
+ "model.layers.39.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
377
+ "model.layers.39.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
378
+ "model.layers.39.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
379
+ "model.layers.39.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
380
+ "model.layers.39.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
381
+ "model.layers.39.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
382
+ "model.layers.4.input_layernorm.weight": "model-00014-of-00014.safetensors",
383
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
384
+ "model.layers.4.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
385
+ "model.layers.4.mlp.up_proj.weight": "model-00008-of-00014.safetensors",
386
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
387
+ "model.layers.4.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
388
+ "model.layers.4.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
389
+ "model.layers.4.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
390
+ "model.layers.4.self_attn.q_norm.weight": "model-00014-of-00014.safetensors",
391
+ "model.layers.4.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
392
+ "model.layers.4.self_attn.v_proj.weight": "model-00014-of-00014.safetensors",
393
+ "model.layers.40.input_layernorm.weight": "model-00007-of-00014.safetensors",
394
+ "model.layers.40.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
395
+ "model.layers.40.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
396
+ "model.layers.40.mlp.up_proj.weight": "model-00014-of-00014.safetensors",
397
+ "model.layers.40.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
398
+ "model.layers.40.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
399
+ "model.layers.40.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
400
+ "model.layers.40.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
401
+ "model.layers.40.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
402
+ "model.layers.40.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
403
+ "model.layers.40.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
404
+ "model.layers.41.input_layernorm.weight": "model-00004-of-00014.safetensors",
405
+ "model.layers.41.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
406
+ "model.layers.41.mlp.gate_proj.weight": "model-00014-of-00014.safetensors",
407
+ "model.layers.41.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
408
+ "model.layers.41.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
409
+ "model.layers.41.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
410
+ "model.layers.41.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
411
+ "model.layers.41.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
412
+ "model.layers.41.self_attn.q_norm.weight": "model-00011-of-00014.safetensors",
413
+ "model.layers.41.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
414
+ "model.layers.41.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
415
+ "model.layers.42.input_layernorm.weight": "model-00008-of-00014.safetensors",
416
+ "model.layers.42.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
417
+ "model.layers.42.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
418
+ "model.layers.42.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
419
+ "model.layers.42.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
420
+ "model.layers.42.self_attn.k_norm.weight": "model-00006-of-00014.safetensors",
421
+ "model.layers.42.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
422
+ "model.layers.42.self_attn.o_proj.weight": "model-00006-of-00014.safetensors",
423
+ "model.layers.42.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
424
+ "model.layers.42.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
425
+ "model.layers.42.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
426
+ "model.layers.43.input_layernorm.weight": "model-00011-of-00014.safetensors",
427
+ "model.layers.43.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
428
+ "model.layers.43.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
429
+ "model.layers.43.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
430
+ "model.layers.43.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
431
+ "model.layers.43.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
432
+ "model.layers.43.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
433
+ "model.layers.43.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
434
+ "model.layers.43.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
435
+ "model.layers.43.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
436
+ "model.layers.43.self_attn.v_proj.weight": "model-00014-of-00014.safetensors",
437
+ "model.layers.44.input_layernorm.weight": "model-00010-of-00014.safetensors",
438
+ "model.layers.44.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
439
+ "model.layers.44.mlp.gate_proj.weight": "model-00014-of-00014.safetensors",
440
+ "model.layers.44.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
441
+ "model.layers.44.post_attention_layernorm.weight": "model-00011-of-00014.safetensors",
442
+ "model.layers.44.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
443
+ "model.layers.44.self_attn.k_proj.weight": "model-00007-of-00014.safetensors",
444
+ "model.layers.44.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
445
+ "model.layers.44.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
446
+ "model.layers.44.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
447
+ "model.layers.44.self_attn.v_proj.weight": "model-00011-of-00014.safetensors",
448
+ "model.layers.45.input_layernorm.weight": "model-00008-of-00014.safetensors",
449
+ "model.layers.45.mlp.down_proj.weight": "model-00014-of-00014.safetensors",
450
+ "model.layers.45.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
451
+ "model.layers.45.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
452
+ "model.layers.45.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
453
+ "model.layers.45.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
454
+ "model.layers.45.self_attn.k_proj.weight": "model-00006-of-00014.safetensors",
455
+ "model.layers.45.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
456
+ "model.layers.45.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
457
+ "model.layers.45.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
458
+ "model.layers.45.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
459
+ "model.layers.46.input_layernorm.weight": "model-00013-of-00014.safetensors",
460
+ "model.layers.46.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
461
+ "model.layers.46.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
462
+ "model.layers.46.mlp.up_proj.weight": "model-00002-of-00014.safetensors",
463
+ "model.layers.46.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
464
+ "model.layers.46.self_attn.k_norm.weight": "model-00003-of-00014.safetensors",
465
+ "model.layers.46.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
466
+ "model.layers.46.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
467
+ "model.layers.46.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
468
+ "model.layers.46.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
469
+ "model.layers.46.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
470
+ "model.layers.47.input_layernorm.weight": "model-00007-of-00014.safetensors",
471
+ "model.layers.47.mlp.down_proj.weight": "model-00009-of-00014.safetensors",
472
+ "model.layers.47.mlp.gate_proj.weight": "model-00014-of-00014.safetensors",
473
+ "model.layers.47.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
474
+ "model.layers.47.post_attention_layernorm.weight": "model-00013-of-00014.safetensors",
475
+ "model.layers.47.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
476
+ "model.layers.47.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
477
+ "model.layers.47.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
478
+ "model.layers.47.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
479
+ "model.layers.47.self_attn.q_proj.weight": "model-00002-of-00014.safetensors",
480
+ "model.layers.47.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
481
+ "model.layers.48.input_layernorm.weight": "model-00003-of-00014.safetensors",
482
+ "model.layers.48.mlp.down_proj.weight": "model-00012-of-00014.safetensors",
483
+ "model.layers.48.mlp.gate_proj.weight": "model-00005-of-00014.safetensors",
484
+ "model.layers.48.mlp.up_proj.weight": "model-00005-of-00014.safetensors",
485
+ "model.layers.48.post_attention_layernorm.weight": "model-00006-of-00014.safetensors",
486
+ "model.layers.48.self_attn.k_norm.weight": "model-00012-of-00014.safetensors",
487
+ "model.layers.48.self_attn.k_proj.weight": "model-00014-of-00014.safetensors",
488
+ "model.layers.48.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
489
+ "model.layers.48.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
490
+ "model.layers.48.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
491
+ "model.layers.48.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
492
+ "model.layers.49.input_layernorm.weight": "model-00012-of-00014.safetensors",
493
+ "model.layers.49.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
494
+ "model.layers.49.mlp.gate_proj.weight": "model-00014-of-00014.safetensors",
495
+ "model.layers.49.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
496
+ "model.layers.49.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
497
+ "model.layers.49.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
498
+ "model.layers.49.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
499
+ "model.layers.49.self_attn.o_proj.weight": "model-00005-of-00014.safetensors",
500
+ "model.layers.49.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
501
+ "model.layers.49.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
502
+ "model.layers.49.self_attn.v_proj.weight": "model-00013-of-00014.safetensors",
503
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00014.safetensors",
504
+ "model.layers.5.mlp.down_proj.weight": "model-00010-of-00014.safetensors",
505
+ "model.layers.5.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
506
+ "model.layers.5.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
507
+ "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
508
+ "model.layers.5.self_attn.k_norm.weight": "model-00008-of-00014.safetensors",
509
+ "model.layers.5.self_attn.k_proj.weight": "model-00014-of-00014.safetensors",
510
+ "model.layers.5.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
511
+ "model.layers.5.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
512
+ "model.layers.5.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
513
+ "model.layers.5.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
514
+ "model.layers.50.input_layernorm.weight": "model-00012-of-00014.safetensors",
515
+ "model.layers.50.mlp.down_proj.weight": "model-00008-of-00014.safetensors",
516
+ "model.layers.50.mlp.gate_proj.weight": "model-00010-of-00014.safetensors",
517
+ "model.layers.50.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
518
+ "model.layers.50.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
519
+ "model.layers.50.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
520
+ "model.layers.50.self_attn.k_proj.weight": "model-00001-of-00014.safetensors",
521
+ "model.layers.50.self_attn.o_proj.weight": "model-00012-of-00014.safetensors",
522
+ "model.layers.50.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
523
+ "model.layers.50.self_attn.q_proj.weight": "model-00014-of-00014.safetensors",
524
+ "model.layers.50.self_attn.v_proj.weight": "model-00005-of-00014.safetensors",
525
+ "model.layers.51.input_layernorm.weight": "model-00013-of-00014.safetensors",
526
+ "model.layers.51.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
527
+ "model.layers.51.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
528
+ "model.layers.51.mlp.up_proj.weight": "model-00010-of-00014.safetensors",
529
+ "model.layers.51.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
530
+ "model.layers.51.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
531
+ "model.layers.51.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
532
+ "model.layers.51.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
533
+ "model.layers.51.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
534
+ "model.layers.51.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
535
+ "model.layers.51.self_attn.v_proj.weight": "model-00010-of-00014.safetensors",
536
+ "model.layers.52.input_layernorm.weight": "model-00013-of-00014.safetensors",
537
+ "model.layers.52.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
538
+ "model.layers.52.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
539
+ "model.layers.52.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
540
+ "model.layers.52.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
541
+ "model.layers.52.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
542
+ "model.layers.52.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
543
+ "model.layers.52.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
544
+ "model.layers.52.self_attn.q_norm.weight": "model-00005-of-00014.safetensors",
545
+ "model.layers.52.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
546
+ "model.layers.52.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
547
+ "model.layers.53.input_layernorm.weight": "model-00007-of-00014.safetensors",
548
+ "model.layers.53.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
549
+ "model.layers.53.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
550
+ "model.layers.53.mlp.up_proj.weight": "model-00014-of-00014.safetensors",
551
+ "model.layers.53.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
552
+ "model.layers.53.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
553
+ "model.layers.53.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
554
+ "model.layers.53.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
555
+ "model.layers.53.self_attn.q_norm.weight": "model-00014-of-00014.safetensors",
556
+ "model.layers.53.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
557
+ "model.layers.53.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
558
+ "model.layers.54.input_layernorm.weight": "model-00003-of-00014.safetensors",
559
+ "model.layers.54.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
560
+ "model.layers.54.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
561
+ "model.layers.54.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
562
+ "model.layers.54.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
563
+ "model.layers.54.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
564
+ "model.layers.54.self_attn.k_proj.weight": "model-00002-of-00014.safetensors",
565
+ "model.layers.54.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
566
+ "model.layers.54.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
567
+ "model.layers.54.self_attn.q_proj.weight": "model-00004-of-00014.safetensors",
568
+ "model.layers.54.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
569
+ "model.layers.55.input_layernorm.weight": "model-00005-of-00014.safetensors",
570
+ "model.layers.55.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
571
+ "model.layers.55.mlp.gate_proj.weight": "model-00007-of-00014.safetensors",
572
+ "model.layers.55.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
573
+ "model.layers.55.post_attention_layernorm.weight": "model-00008-of-00014.safetensors",
574
+ "model.layers.55.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
575
+ "model.layers.55.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
576
+ "model.layers.55.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
577
+ "model.layers.55.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
578
+ "model.layers.55.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
579
+ "model.layers.55.self_attn.v_proj.weight": "model-00003-of-00014.safetensors",
580
+ "model.layers.56.input_layernorm.weight": "model-00003-of-00014.safetensors",
581
+ "model.layers.56.mlp.down_proj.weight": "model-00004-of-00014.safetensors",
582
+ "model.layers.56.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
583
+ "model.layers.56.mlp.up_proj.weight": "model-00006-of-00014.safetensors",
584
+ "model.layers.56.post_attention_layernorm.weight": "model-00005-of-00014.safetensors",
585
+ "model.layers.56.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
586
+ "model.layers.56.self_attn.k_proj.weight": "model-00013-of-00014.safetensors",
587
+ "model.layers.56.self_attn.o_proj.weight": "model-00011-of-00014.safetensors",
588
+ "model.layers.56.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
589
+ "model.layers.56.self_attn.q_proj.weight": "model-00006-of-00014.safetensors",
590
+ "model.layers.56.self_attn.v_proj.weight": "model-00007-of-00014.safetensors",
591
+ "model.layers.57.input_layernorm.weight": "model-00005-of-00014.safetensors",
592
+ "model.layers.57.mlp.down_proj.weight": "model-00014-of-00014.safetensors",
593
+ "model.layers.57.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
594
+ "model.layers.57.mlp.up_proj.weight": "model-00011-of-00014.safetensors",
595
+ "model.layers.57.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
596
+ "model.layers.57.self_attn.k_norm.weight": "model-00001-of-00014.safetensors",
597
+ "model.layers.57.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
598
+ "model.layers.57.self_attn.o_proj.weight": "model-00001-of-00014.safetensors",
599
+ "model.layers.57.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
600
+ "model.layers.57.self_attn.q_proj.weight": "model-00007-of-00014.safetensors",
601
+ "model.layers.57.self_attn.v_proj.weight": "model-00012-of-00014.safetensors",
602
+ "model.layers.58.input_layernorm.weight": "model-00011-of-00014.safetensors",
603
+ "model.layers.58.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
604
+ "model.layers.58.mlp.gate_proj.weight": "model-00004-of-00014.safetensors",
605
+ "model.layers.58.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
606
+ "model.layers.58.post_attention_layernorm.weight": "model-00009-of-00014.safetensors",
607
+ "model.layers.58.self_attn.k_norm.weight": "model-00004-of-00014.safetensors",
608
+ "model.layers.58.self_attn.k_proj.weight": "model-00009-of-00014.safetensors",
609
+ "model.layers.58.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
610
+ "model.layers.58.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
611
+ "model.layers.58.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
612
+ "model.layers.58.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
613
+ "model.layers.59.input_layernorm.weight": "model-00013-of-00014.safetensors",
614
+ "model.layers.59.mlp.down_proj.weight": "model-00003-of-00014.safetensors",
615
+ "model.layers.59.mlp.gate_proj.weight": "model-00008-of-00014.safetensors",
616
+ "model.layers.59.mlp.up_proj.weight": "model-00007-of-00014.safetensors",
617
+ "model.layers.59.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
618
+ "model.layers.59.self_attn.k_norm.weight": "model-00010-of-00014.safetensors",
619
+ "model.layers.59.self_attn.k_proj.weight": "model-00011-of-00014.safetensors",
620
+ "model.layers.59.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
621
+ "model.layers.59.self_attn.q_norm.weight": "model-00007-of-00014.safetensors",
622
+ "model.layers.59.self_attn.q_proj.weight": "model-00012-of-00014.safetensors",
623
+ "model.layers.59.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
624
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00014.safetensors",
625
+ "model.layers.6.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
626
+ "model.layers.6.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
627
+ "model.layers.6.mlp.up_proj.weight": "model-00012-of-00014.safetensors",
628
+ "model.layers.6.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
629
+ "model.layers.6.self_attn.k_norm.weight": "model-00002-of-00014.safetensors",
630
+ "model.layers.6.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
631
+ "model.layers.6.self_attn.o_proj.weight": "model-00003-of-00014.safetensors",
632
+ "model.layers.6.self_attn.q_norm.weight": "model-00008-of-00014.safetensors",
633
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00014.safetensors",
634
+ "model.layers.6.self_attn.v_proj.weight": "model-00014-of-00014.safetensors",
635
+ "model.layers.60.input_layernorm.weight": "model-00007-of-00014.safetensors",
636
+ "model.layers.60.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
637
+ "model.layers.60.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
638
+ "model.layers.60.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
639
+ "model.layers.60.post_attention_layernorm.weight": "model-00003-of-00014.safetensors",
640
+ "model.layers.60.self_attn.k_norm.weight": "model-00007-of-00014.safetensors",
641
+ "model.layers.60.self_attn.k_proj.weight": "model-00012-of-00014.safetensors",
642
+ "model.layers.60.self_attn.o_proj.weight": "model-00009-of-00014.safetensors",
643
+ "model.layers.60.self_attn.q_norm.weight": "model-00003-of-00014.safetensors",
644
+ "model.layers.60.self_attn.q_proj.weight": "model-00009-of-00014.safetensors",
645
+ "model.layers.60.self_attn.v_proj.weight": "model-00002-of-00014.safetensors",
646
+ "model.layers.61.input_layernorm.weight": "model-00002-of-00014.safetensors",
647
+ "model.layers.61.mlp.down_proj.weight": "model-00006-of-00014.safetensors",
648
+ "model.layers.61.mlp.gate_proj.weight": "model-00001-of-00014.safetensors",
649
+ "model.layers.61.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
650
+ "model.layers.61.post_attention_layernorm.weight": "model-00007-of-00014.safetensors",
651
+ "model.layers.61.self_attn.k_norm.weight": "model-00011-of-00014.safetensors",
652
+ "model.layers.61.self_attn.k_proj.weight": "model-00003-of-00014.safetensors",
653
+ "model.layers.61.self_attn.o_proj.weight": "model-00008-of-00014.safetensors",
654
+ "model.layers.61.self_attn.q_norm.weight": "model-00006-of-00014.safetensors",
655
+ "model.layers.61.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
656
+ "model.layers.61.self_attn.v_proj.weight": "model-00001-of-00014.safetensors",
657
+ "model.layers.62.input_layernorm.weight": "model-00006-of-00014.safetensors",
658
+ "model.layers.62.mlp.down_proj.weight": "model-00001-of-00014.safetensors",
659
+ "model.layers.62.mlp.gate_proj.weight": "model-00006-of-00014.safetensors",
660
+ "model.layers.62.mlp.up_proj.weight": "model-00003-of-00014.safetensors",
661
+ "model.layers.62.post_attention_layernorm.weight": "model-00002-of-00014.safetensors",
662
+ "model.layers.62.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
663
+ "model.layers.62.self_attn.k_proj.weight": "model-00008-of-00014.safetensors",
664
+ "model.layers.62.self_attn.o_proj.weight": "model-00013-of-00014.safetensors",
665
+ "model.layers.62.self_attn.q_norm.weight": "model-00009-of-00014.safetensors",
666
+ "model.layers.62.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
667
+ "model.layers.62.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
668
+ "model.layers.63.input_layernorm.weight": "model-00012-of-00014.safetensors",
669
+ "model.layers.63.mlp.down_proj.weight": "model-00002-of-00014.safetensors",
670
+ "model.layers.63.mlp.gate_proj.weight": "model-00002-of-00014.safetensors",
671
+ "model.layers.63.mlp.up_proj.weight": "model-00001-of-00014.safetensors",
672
+ "model.layers.63.post_attention_layernorm.weight": "model-00004-of-00014.safetensors",
673
+ "model.layers.63.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
674
+ "model.layers.63.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
675
+ "model.layers.63.self_attn.o_proj.weight": "model-00014-of-00014.safetensors",
676
+ "model.layers.63.self_attn.q_norm.weight": "model-00012-of-00014.safetensors",
677
+ "model.layers.63.self_attn.q_proj.weight": "model-00008-of-00014.safetensors",
678
+ "model.layers.63.self_attn.v_proj.weight": "model-00004-of-00014.safetensors",
679
+ "model.layers.7.input_layernorm.weight": "model-00008-of-00014.safetensors",
680
+ "model.layers.7.mlp.down_proj.weight": "model-00011-of-00014.safetensors",
681
+ "model.layers.7.mlp.gate_proj.weight": "model-00011-of-00014.safetensors",
682
+ "model.layers.7.mlp.up_proj.weight": "model-00014-of-00014.safetensors",
683
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00014.safetensors",
684
+ "model.layers.7.self_attn.k_norm.weight": "model-00005-of-00014.safetensors",
685
+ "model.layers.7.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
686
+ "model.layers.7.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
687
+ "model.layers.7.self_attn.q_norm.weight": "model-00013-of-00014.safetensors",
688
+ "model.layers.7.self_attn.q_proj.weight": "model-00011-of-00014.safetensors",
689
+ "model.layers.7.self_attn.v_proj.weight": "model-00009-of-00014.safetensors",
690
+ "model.layers.8.input_layernorm.weight": "model-00012-of-00014.safetensors",
691
+ "model.layers.8.mlp.down_proj.weight": "model-00007-of-00014.safetensors",
692
+ "model.layers.8.mlp.gate_proj.weight": "model-00009-of-00014.safetensors",
693
+ "model.layers.8.mlp.up_proj.weight": "model-00004-of-00014.safetensors",
694
+ "model.layers.8.post_attention_layernorm.weight": "model-00014-of-00014.safetensors",
695
+ "model.layers.8.self_attn.k_norm.weight": "model-00009-of-00014.safetensors",
696
+ "model.layers.8.self_attn.k_proj.weight": "model-00004-of-00014.safetensors",
697
+ "model.layers.8.self_attn.o_proj.weight": "model-00010-of-00014.safetensors",
698
+ "model.layers.8.self_attn.q_norm.weight": "model-00014-of-00014.safetensors",
699
+ "model.layers.8.self_attn.q_proj.weight": "model-00005-of-00014.safetensors",
700
+ "model.layers.8.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
701
+ "model.layers.9.input_layernorm.weight": "model-00007-of-00014.safetensors",
702
+ "model.layers.9.mlp.down_proj.weight": "model-00005-of-00014.safetensors",
703
+ "model.layers.9.mlp.gate_proj.weight": "model-00013-of-00014.safetensors",
704
+ "model.layers.9.mlp.up_proj.weight": "model-00013-of-00014.safetensors",
705
+ "model.layers.9.post_attention_layernorm.weight": "model-00012-of-00014.safetensors",
706
+ "model.layers.9.self_attn.k_norm.weight": "model-00013-of-00014.safetensors",
707
+ "model.layers.9.self_attn.k_proj.weight": "model-00005-of-00014.safetensors",
708
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00014.safetensors",
709
+ "model.layers.9.self_attn.q_norm.weight": "model-00001-of-00014.safetensors",
710
+ "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00014.safetensors",
711
+ "model.layers.9.self_attn.v_proj.weight": "model-00006-of-00014.safetensors",
712
+ "model.norm.weight": "model-00011-of-00014.safetensors"
713
+ }
714
+ }
hf/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
hf/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
hf/tokenizer_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "model_max_length": 131072,
235
+ "pad_token": "<|endoftext|>",
236
+ "split_special_tokens": false,
237
+ "tokenizer_class": "Qwen2Tokenizer",
238
+ "unk_token": null,
239
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in message.content %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}"
240
+ }
hf/tokenizer_config.json.bak ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "clean_up_tokenization_spaces": false,
231
+ "eos_token": "<|im_end|>",
232
+ "errors": "replace",
233
+ "extra_special_tokens": {},
234
+ "model_max_length": 131072,
235
+ "pad_token": "<|endoftext|>",
236
+ "split_special_tokens": false,
237
+ "tokenizer_class": "Qwen2Tokenizer",
238
+ "unk_token": null
239
+ }
hf/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
hf_ip/lb_endpoint.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 10.45.17.59:8000
hf_ip/load_balancer.log ADDED
@@ -0,0 +1,1198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
2
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
3
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
4
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
5
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
6
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
7
+ 10.45.190.244 - "GET /v1/models HTTP/1.1" 200 -
8
+ 10.45.190.245 - "GET /v1/models HTTP/1.1" 200 -
9
+ 10.46.50.247 - "GET /v1/models HTTP/1.1" 200 -
10
+ 10.46.17.244 - "GET /v1/models HTTP/1.1" 200 -
11
+ 10.46.50.251 - "GET /v1/models HTTP/1.1" 200 -
12
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
13
+ 1010.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
14
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
15
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
16
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
17
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
18
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
19
+ 110.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
20
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1"10.45.110.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
21
+ 10.46.10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
22
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
23
+ 10.45.190.244 - "POST /v1/completions HTTP/10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
24
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
25
+ 10.45.190.10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
26
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
27
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
28
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
29
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
30
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
31
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
32
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1"10.46.510.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
33
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
34
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
35
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1"10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
36
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
37
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
38
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
39
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
40
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
41
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
42
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
43
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
44
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
45
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
46
+ 10.46.50.10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
47
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
48
+ 10.45.190.242 - "POST /v1/completions HTTP/10.46.17.2310.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
49
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
50
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
51
+ 10.46.17.236 - "POST /v1/completions HTTP10.46.17.23610.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
52
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
53
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
54
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
55
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
56
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
57
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
58
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
59
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
60
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
61
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
62
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
63
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
64
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 20010.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
65
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
66
+ 10.410.46.50.247 - "POST /v1/completions HTTP/1.1" 2010.410.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
67
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
68
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
69
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
70
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
71
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
72
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
73
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
74
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
75
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 20010.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
76
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
77
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
78
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
79
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
80
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
81
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
82
+ 1010.45.190.245 - "POST /v1/completions HTTP/1.1" 20010.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
83
+ 10.10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
84
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
85
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
86
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
87
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
88
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
89
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
90
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
91
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
92
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
93
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
94
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
95
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1"10.46.110.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
96
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
97
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
98
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 10.46.10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
99
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
100
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
101
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
102
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
103
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
104
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
105
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
106
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
107
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
108
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
109
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
110
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
111
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
112
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
113
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
114
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
115
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
116
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
117
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
118
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
119
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
120
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
121
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
122
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
123
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
124
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
125
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
126
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
127
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
128
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
129
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
130
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
131
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
132
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
133
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
134
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
135
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
136
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
137
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
138
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
139
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
140
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
141
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
142
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
143
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
144
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
145
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
146
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
147
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
148
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
149
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
150
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
151
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
152
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
153
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
154
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
155
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
156
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
157
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 2010.410.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
158
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
159
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
160
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
161
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
162
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
163
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
164
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
165
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
166
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
167
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
168
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
169
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
170
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
171
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
172
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
173
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
174
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
175
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
176
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
177
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
178
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
179
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
180
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
181
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
182
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
183
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
184
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
185
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 20010.10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
186
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
187
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
188
+ 110.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
189
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
190
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
191
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
192
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -110.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
193
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
194
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
195
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
196
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
197
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
198
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
199
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
200
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
201
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
202
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
203
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
204
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
205
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
206
+ 1010.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
207
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
208
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
209
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
210
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
211
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
212
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
213
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
214
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
215
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
216
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
217
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
218
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
219
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
220
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
221
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
222
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
223
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
224
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
225
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
226
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
227
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
228
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
229
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
230
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
231
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
232
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
233
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
234
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
235
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
236
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
237
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
238
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
239
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
240
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
241
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
242
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
243
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
244
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
245
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
246
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
247
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
248
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
249
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
250
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
251
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
252
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
253
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
254
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
255
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
256
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
257
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
258
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
259
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
260
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
261
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 210.4610.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
262
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
263
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
264
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
265
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
266
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
267
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
268
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
269
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
270
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
271
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 1010.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
272
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
273
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
274
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
275
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
276
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
277
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
278
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
279
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
280
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
281
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
282
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
283
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
284
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
285
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
286
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
287
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
288
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
289
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
290
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
291
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 1010.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
292
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
293
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
294
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
295
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
296
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
297
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
298
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
299
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
300
+ 1010.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
301
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
302
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
303
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
304
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 20010.10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
305
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
306
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
307
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
308
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
309
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
310
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
311
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
312
+ 110.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
313
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
314
+ 10.10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
315
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
316
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 210.4610.46.17.240 - "POST /v1/completions HTTP/1.1" 210.4610.45.190.245 - "POST /v1/completions HTTP/1.1" 10.46.10.46.50.247 - "POST /v1/completions HTTP/1.1" 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
317
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
318
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
319
+ 10.4510.45.190.244 - "POST /v1/completions HTTP/1.1" 210.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
320
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
321
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
322
+ 10.4610.46.50.247 - "POST /v1/completions HTTP/1.1" 210.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
323
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
324
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
325
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
326
+ 10.410.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
327
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 210.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
328
+ 10.410.46.17.244 - "POST /v1/completions HTTP/1.1" 2010.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
329
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
330
+ 10.4510.46.17.244 - "POST /v1/completions HTTP/1.1" 2010.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
331
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
332
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
333
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
334
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
335
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
336
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
337
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
338
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
339
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
340
+ 10.410.46.17.236 - "POST /v1/completions HTTP/1.1" 2010.410.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
341
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
342
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
343
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
344
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
345
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
346
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
347
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
348
+ 10.410.46.17.244 - "POST /v1/completions HTTP/1.1" 2010.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
349
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
350
+ 10.10.46.50.251 - "POST /v1/completions HTTP/1.1" 20010.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
351
+ 10.10.46.50.203 - "POST /v1/completions HTTP/1.1" 20010.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
352
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
353
+ 10.10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
354
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
355
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
356
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
357
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
358
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
359
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
360
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
361
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
362
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
363
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
364
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
365
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
366
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
367
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
368
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
369
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -110.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
370
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
371
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
372
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
373
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 20010.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
374
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
375
+ 1010.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
376
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
377
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
378
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
379
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 2010.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
380
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
381
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
382
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
383
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
384
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
385
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
386
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
387
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
388
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
389
+ 1010.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
390
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
391
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
392
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
393
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
394
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
395
+ 10.10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
396
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
397
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
398
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
399
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
400
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
401
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
402
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
403
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
404
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
405
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
406
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
407
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
408
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
409
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
410
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
411
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
412
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
413
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
414
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
415
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
416
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
417
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
418
+ 110.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
419
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
420
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
421
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
422
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
423
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
424
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
425
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
426
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
427
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
428
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
429
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
430
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
431
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
432
+ 110.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
433
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
434
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
435
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
436
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
437
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
438
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
439
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
440
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
441
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
442
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
443
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
444
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
445
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
446
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
447
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
448
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
449
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 20010.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
450
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
451
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
452
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
453
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
454
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
455
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
456
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
457
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
458
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
459
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
460
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
461
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
462
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
463
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
464
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
465
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
466
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
467
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
468
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
469
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
470
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
471
+ 10.10.45.190.244 - "POST /v1/completions HTTP/1.1" 20010.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
472
+ 10.10.46.17.2410.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
473
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
474
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
475
+ 10.46.50.251 - "POST /v1/completions HTTP10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
476
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
477
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
478
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
479
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
480
+ 10.46.17.236 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
481
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
482
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
483
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
484
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
485
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
486
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
487
+ 10.46.17.236 - "POST /v1/completions HTTP/10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
488
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
489
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
490
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
491
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
492
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
493
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
494
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
495
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
496
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
497
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
498
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
499
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
500
+ 10.45.190.2410.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
501
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
502
+ 10.46.17.236 - "POST /v1/completions HTTP/10.46.17.2310.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
503
+ 10.46.17.244 - "POST /v1/completions HTTP10.46.50.25110.46.17.236 - "POST /v1/completions HTTP10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
504
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
505
+ 10.46.50.203 10.46.17.240 - "POST /v1/completions HTT10.45.190.24510.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
506
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
507
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
508
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
509
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
510
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
511
+ 10.45.190.244 - "POST /v1/completions 10.45.190.245 - 10.46.17.244 - "POST /v1/completions H10.46.50.251 - 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
512
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
513
+ 10.46.17.240 - "POST /v1/completions H10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
514
+ 10.46.17.236 -10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
515
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
516
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
517
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
518
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
519
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
520
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
521
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
522
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
523
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
524
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
525
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
526
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
527
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
528
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
529
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
530
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
531
+ 10.45.190.244 - "POST /v1/completi10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
532
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
533
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
534
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
535
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
536
+ 10.45.190.242 - "PO10.46.50.247 - "POST /v1/completion10.45.190.245 - "P10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
537
+ 10.46.50.203 - "POST /v1/completions HTTP/110.46.50.210.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
538
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
539
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
540
+ 10.45.190.244 - "POST /v1/completions HTTP/10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
541
+ 10.46.17.23610.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
542
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
543
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
544
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
545
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
546
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
547
+ 10.46.17.236 - "POST /v1/completions 10.10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
548
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
549
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 210.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
550
+ 10.10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
551
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
552
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
553
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
554
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
555
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
556
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
557
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
558
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
559
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
560
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
561
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
562
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
563
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
564
+ 10.46.50.203 - "10.46.17.236 - "POST /v1/completions 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
565
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
566
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
567
+ 10.45.190.245 -10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
568
+ 10.46.50.251 - "POST /v1/completions HT10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
569
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
570
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
571
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
572
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
573
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
574
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
575
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
576
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
577
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
578
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
579
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
580
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
581
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
582
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
583
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
584
+ 10.410.46.17.236 - "POST /v1/completions HTTP/1.1" 2010.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
585
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
586
+ 10.10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
587
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
588
+ 10.45.190.210.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
589
+ 10.45.190.242 - "POST /v1/completions HTTP/110.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
590
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
591
+ 10.45.190.210.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
592
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
593
+ 10.46.17.244 - "POST /v1/completions HT10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
594
+ 10.45.190.245 10.46.17.240 - "POST /v1/completions HTT10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
595
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
596
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
597
+ 10.45.190.24210.46.50.251 - "POST /v1/completions HTTP/1.10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
598
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
599
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
600
+ 10.46.17.2410.46.50.203 - "POST /v1/completions HTTP/10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
601
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
602
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
603
+ 10.45.190.210.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
604
+ 10.46.17.236 - "POST /v1/completions HTTP/10.46.17.2410.45.190.244 - "POST /v1/completions HTTP10.410.46.1710.45.190.242 - "POST /v1/completions HTT10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
605
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
606
+ 10.410.46.50.251 - "POST /v1/completions HTTP/1.1" 2010.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
607
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
608
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
609
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
610
+ 10.4610.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
611
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
612
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 210.4610.45.190.242 - "POST /v1/completions HTTP/1.1" 10.45.10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
613
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
614
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
615
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
616
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
617
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
618
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
619
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
620
+ 10.46.10.45.190.244 - "POST /v1/completions HTTP/1.1"10.46.510.46.17.236 - "POST /v1/completions HTTP/1.1"10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
621
+ 10.46.110.46.17.236 - "POST /v1/completions HTTP/1.1"10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
622
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
623
+ 10.4610.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
624
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
625
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1"10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
626
+ 10.46.10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
627
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
628
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
629
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
630
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
631
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
632
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
633
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
634
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
635
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
636
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
637
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
638
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
639
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
640
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
641
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
642
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
643
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
644
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
645
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
646
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
647
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
648
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
649
+ 10.46.50.203 - "POST /v1/completions HTTP/110.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
650
+ 10.46.17.2410.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
651
+ 10.46.17.244 - "POST /v1/completions HTTP10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
652
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
653
+ 10.46.17.2410.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
654
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
655
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
656
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
657
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
658
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
659
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
660
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
661
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
662
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
663
+ 10.46.17.244 - "POST /v1/completions HTTP/1.10.46.17.10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
664
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
665
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
666
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
667
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
668
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
669
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
670
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
671
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
672
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
673
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
674
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
675
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
676
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
677
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
678
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
679
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
680
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
681
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
682
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
683
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
684
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
685
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
686
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
687
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
688
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
689
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
690
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
691
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
692
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
693
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
694
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
695
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
696
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
697
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
698
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
699
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
700
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
701
+ 10.46.17.240 - "POST /v1/completions HTTP/10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
702
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
703
+ 10.46.50.2510.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
704
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
705
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
706
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
707
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
708
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
709
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
710
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
711
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
712
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
713
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
714
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
715
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
716
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
717
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
718
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
719
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
720
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
721
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
722
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
723
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
724
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
725
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
726
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
727
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
728
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
729
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
730
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
731
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
732
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
733
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
734
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
735
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
736
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
737
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
738
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
739
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
740
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
741
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
742
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
743
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
744
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
745
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
746
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
747
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
748
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
749
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
750
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
751
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
752
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
753
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
754
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
755
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
756
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
757
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
758
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
759
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
760
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
761
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
762
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
763
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
764
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
765
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
766
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
767
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
768
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
769
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
770
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
771
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
772
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
773
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
774
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
775
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
776
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
777
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
778
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
779
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
780
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
781
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
782
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
783
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
784
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
785
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
786
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
787
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
788
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
789
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
790
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
791
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
792
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
793
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
794
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
795
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
796
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
797
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
798
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
799
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
800
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
801
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
802
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
803
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
804
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
805
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
806
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
807
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
808
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
809
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
810
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
811
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
812
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
813
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
814
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
815
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
816
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
817
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
818
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
819
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
820
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
821
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
822
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
823
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
824
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
825
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
826
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
827
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
828
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
829
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
830
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
831
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
832
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
833
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
834
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
835
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
836
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
837
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
838
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
839
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
840
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
841
+ 110.45.190.242 - "POST /v1/completions HTTP/1.1" 200 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
842
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
843
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
844
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
845
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
846
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
847
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
848
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
849
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
850
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
851
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
852
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
853
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
854
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
855
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
856
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
857
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
858
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
859
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
860
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
861
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
862
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
863
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
864
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
865
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
866
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
867
+ 10.4610.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
868
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 210.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
869
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
870
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
871
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
872
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
873
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
874
+ 10.10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
875
+ 1010.46.17.240 - "POST /v1/completions HTTP/1.1" 200 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
876
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
877
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
878
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
879
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
880
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
881
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
882
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
883
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
884
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
885
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
886
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
887
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
888
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
889
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
890
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
891
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
892
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
893
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
894
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
895
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
896
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
897
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
898
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
899
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
900
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
901
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
902
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
903
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
904
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
905
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
906
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
907
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
908
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
909
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
910
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
911
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
912
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
913
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
914
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
915
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
916
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
917
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
918
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
919
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
920
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
921
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
922
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
923
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
924
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
925
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
926
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
927
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
928
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
929
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
930
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
931
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
932
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
933
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
934
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
935
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
936
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
937
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
938
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
939
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
940
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
941
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
942
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
943
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
944
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
945
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
946
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
947
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
948
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
949
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
950
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
951
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
952
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
953
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
954
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
955
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
956
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
957
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
958
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
959
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
960
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
961
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
962
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
963
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
964
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
965
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
966
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
967
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
968
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
969
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
970
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
971
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
972
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
973
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
974
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
975
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
976
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
977
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
978
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
979
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
980
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
981
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
982
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
983
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
984
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
985
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
986
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
987
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
988
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
989
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
990
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
991
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
992
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
993
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
994
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
995
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
996
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
997
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
998
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
999
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1000
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1001
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1002
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1003
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1004
+ 10.46.17.210.410.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1005
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 210.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1006
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1007
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1008
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1009
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1010
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1011
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1012
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1013
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1014
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1015
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1016
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1"10.46.510.46.50.247 - "POST /v1/completions HTTP/1.1"10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1017
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1018
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1019
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1020
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1021
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1022
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1023
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1024
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1025
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1026
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1027
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1028
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1029
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1030
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1031
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1032
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1033
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1034
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1035
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1036
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1037
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1038
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1039
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1040
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1041
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1042
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1043
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1044
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1045
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1046
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1047
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1048
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1049
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1050
+ 10.4610.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1051
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1052
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1053
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1054
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1055
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1056
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1057
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1058
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1059
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1060
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1061
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1062
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1063
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1064
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1065
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1066
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1067
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1068
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1069
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1070
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1071
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1072
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1073
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1074
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1075
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1076
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1077
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1078
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1079
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1080
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1081
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1082
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1083
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1084
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1085
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1086
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1087
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1088
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1089
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1090
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1091
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1092
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1093
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1094
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1095
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1096
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1097
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1098
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1099
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1100
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1101
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1102
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1103
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1104
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1105
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1106
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1107
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1108
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1109
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1110
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1111
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1112
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1113
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1114
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1115
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1116
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1117
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1118
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1119
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1120
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1121
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1122
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1123
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1124
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1125
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1126
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1127
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1128
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1129
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1130
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1131
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1132
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1133
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1134
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1135
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1136
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1137
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1138
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1139
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1140
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1141
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1142
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1143
+ 1" 200 -
1144
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1145
+ 10.10.46.17.240 - "POST /v1/completions HTTP/1.1" 20010.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1146
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1147
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1148
+ -
1149
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1150
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1151
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1152
+ -
1153
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1154
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1155
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1156
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1157
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1158
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1159
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1160
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1161
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1162
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1163
+ v1/completions HTTP/1.1" 200 -
1164
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1165
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1166
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1167
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1168
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1169
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1170
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1171
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1172
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1173
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
1174
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1175
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1176
+ 1/completions HTTP/1.1" 200 -
1177
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1178
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1179
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1180
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1181
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1182
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1183
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1184
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1185
+ 03 - "POST /v1/completions HTTP/1.1" 200 -
1186
+ 10.45.190.244 - "POST /v1/completions HTTP/1.1" 200 -
1187
+ 10.46.50.247 - "POST /v1/completions HTTP/1.1" 200 -
1188
+
1189
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1190
+ 10.46.50.251 - "POST /v1/completions HTTP/1.1" 200 -
1191
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1192
+ 10.46.17.244 - "POST /v1/completions HTTP/1.1" 200 -
1193
+ 10.45.190.242 - "POST /v1/completions HTTP/1.1" 200 -
1194
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1195
+ 10.46.50.203 - "POST /v1/completions HTTP/1.1" 200 -
1196
+ 10.46.17.236 - "POST /v1/completions HTTP/1.1" 200 -
1197
+ 10.46.17.240 - "POST /v1/completions HTTP/1.1" 200 -
1198
+ 10.45.190.245 - "POST /v1/completions HTTP/1.1" 200 -
hf_ip/vllm_gpu0.log ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
2
+ warnings.warn(
3
+ INFO 01-04 13:13:24 [__init__.py:239] Automatically detected platform cuda.
4
+ INFO 01-04 13:13:26 [api_server.py:1043] vLLM API server version 0.8.5
5
+ INFO 01-04 13:13:26 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8001, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
6
+ INFO 01-04 13:13:33 [config.py:717] This model supports multiple tasks: {'generate', 'reward', 'embed', 'score', 'classify'}. Defaulting to 'generate'.
7
+ INFO 01-04 13:13:33 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
8
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
9
+ warnings.warn(
10
+ INFO 01-04 13:13:38 [__init__.py:239] Automatically detected platform cuda.
11
+ INFO 01-04 13:13:41 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
12
+ WARNING 01-04 13:13:41 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7fc89ba32170>
13
+ INFO 01-04 13:13:42 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
14
+ INFO 01-04 13:13:42 [cuda.py:221] Using Flash Attention backend on V1 engine.
15
+ WARNING 01-04 13:13:42 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
16
+ INFO 01-04 13:13:42 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+ INFO 01-04 13:15:40 [loader.py:458] Loading weights took 117.89 seconds
35
+ INFO 01-04 13:15:41 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 118.156821 seconds
36
+ INFO 01-04 13:15:55 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
37
+ INFO 01-04 13:15:55 [backends.py:430] Dynamo bytecode transform time: 14.60 s
38
+ INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later use
39
+ INFO 01-04 13:16:50 [backends.py:148] Compiling a graph for general shape takes 53.42 s
40
+ INFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 68.02 s in total
41
+ INFO 01-04 13:17:54 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
42
+ INFO 01-04 13:17:54 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94x
43
+ INFO 01-04 13:18:32 [gpu_model_runner.py:1686] Graph capturing finished in 39 secs, took 1.21 GiB
44
+ INFO 01-04 13:18:32 [core.py:159] init engine (profile, create kv cache, warmup model) took 171.40 seconds
45
+ INFO 01-04 13:18:32 [core_client.py:439] Core engine process 0 ready.
46
+ WARNING 01-04 13:18:32 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
47
+ INFO 01-04 13:18:32 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
48
+ INFO 01-04 13:18:32 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
49
+ INFO 01-04 13:18:32 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8001
50
+ INFO 01-04 13:18:32 [launcher.py:28] Available routes are:
51
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /openapi.json, Methods: HEAD, GET
52
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /docs, Methods: HEAD, GET
53
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: HEAD, GET
54
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /redoc, Methods: HEAD, GET
55
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /health, Methods: GET
56
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /load, Methods: GET
57
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /ping, Methods: GET, POST
58
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /tokenize, Methods: POST
59
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /detokenize, Methods: POST
60
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/models, Methods: GET
61
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /version, Methods: GET
62
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
63
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/completions, Methods: POST
64
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/embeddings, Methods: POST
65
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /pooling, Methods: POST
66
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /score, Methods: POST
67
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/score, Methods: POST
68
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
69
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /rerank, Methods: POST
70
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/rerank, Methods: POST
71
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v2/rerank, Methods: POST
72
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /invocations, Methods: POST
73
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /metrics, Methods: GET
74
+ INFO: Started server process [1315105]
75
+ INFO: Waiting for application startup.
76
+ INFO: Application startup comINFO 01INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.INFO 01-04INFO 01-04 14:00:03 [loggers.py:111] Engine 000: Avg prompt throughput: 106.6 tokens/s, Avg generation throughput: 70.6 tokens/s, Running: 1 reqINFO 01-INFO 01-04 14:00:07 [loggers.py:111] Engine 000: Avg prINFO 01-04 INFO 01-04 14:00:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.2 tokens/s, Running: 1 reqs,INFO 01-04 14:00:15 [loggers.py:111] Engine 000: Avg prompt tINFINFO 01-04 14:00:17 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04INFO 01-04 14:00:23 [loggers.py:111] Engine 000: Avg prompt throughput: 77.3 toINFO 01-04 14:00:25 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:00:26 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 INFO 01-04 14:00:33 [loggers.py:111] Engine 000: Avg prompt throughput: 64.9 tokINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
77
+ INFO 01-04 14:00:35 [loggers.py:111] Engine 000: Avg promptINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
78
+ INFO 01-0INFO 01-04 14:00:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/sINFO 01-04 14:00:45 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:00:46 [loggers.py:111] Engine 000: Avg prompt througINFO 01-INFO 01-04 14:00:53 [loggers.py:111] Engine 000: Avg prompt throughput: 72.0 tokens/s, Avg generation throughput: 74.1 tokens/s, Running: 2 reINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
79
+ ININFO 01INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1INFO 01-04 INFO 01-04 14:01:03 [loggers.py:111] Engine 000: Avg prompt throughput: 70.9 tokens/s, Avg generation throughput: 72.9 tokens/s, Running: 2 reqINFO 01-04 14:01:06 [loggers.py:111] Engine 000: Avg prompt thrINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
80
+ INFO 01-04 14INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
81
+ INFO 01-04 14:01:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0INFOINFO 01-04 14:01:17 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:01:17 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14INFO 01-04 14:01:23 [loggers.py:111] Engine 000: Avg prompt throughput: 81.3 tokens/s, INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/INFO 01-04 14:01:27 [loggers.py:111] Engine 000: Avg prompt throughput: 81.7 INFO: 10.46.17.192:0 - "POST /v1/completions HTTPINFO 01-04 14INFO 01-04 14:01:33 [loggers.py:111] Engine 000: Avg prompt throughput: 67INFO 0INFO 01-04 14:01:37 [loggers.py:111] Engine 000: Avg prompt throughput: 110.1 tokens/s, Avg generation throughput: 106.4 tokensINFO 01-04 INFO 01-04 14:01:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 rINFO 01INFO 01-04 14:01:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 111.8 toINFO: 10.46INFO 01-04 14:01:53 [loggers.py:111] Engine 000: Avg prompt throughput: 72.8 tokeINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 20INFO INFO 01-04 14:01:57 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14INFO 01-04 14:02:03 [loggers.py:111] Engine 000: Avg prompt throughput: 123.4 tokens/s, Avg generation throughput: 62.8 tokens/s, Running: 2 reqs, INFOINFO 01-04 14:02:05 [loggers.py:111] Engine 000: Avg promINFINFO 01-04 14:02:07 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:0INFO 01-04 14:02:13 [loggers.py:111] Engine 000: Avg prompt throughput: 55.0 tokens/s, Avg generation throughput: 87.8 tokens/s, Running: 2 reININFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:INFO 01-04 14:02:23 [loggers.py:111] Engine 000: Avg prompt throughput: 80.3 tokens/sINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1"INFO: INFO 01-04 14:02:25 [loggers.py:111] Engine 000: Avg pINFO 01-04 1INFO: 10.45.190.192:0 - "POST /v1/completions HINFO 01-04 14:02INFO 01-04 14:02:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, INFO 01-04 14:02:35 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:02:36 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:02INFO 01-04 14:02:43 [loggers.py:111] Engine 000: Avg prompt throughput: 143.7 tokens/s, Avg generation throughput: 77.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache IINFO 01-04 14:0INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
82
+ INFO 01-04 14:02:53 [loggers.py:111] Engine 000: Avg prompt throughput: 87.8 tokens/s, Avg generation throughput: 97.0 tokens/s, Running: 2 INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
83
+ INFO 01-04 14:INFO 01-04 14:03:03 [loggers.py:111] Engine 000: Avg prompt throughput: 110.9 tokens/s, Avg generation throughput: 121.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
84
+ INFO:INFO 01-04 14:03:07 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:03:09 [INFO 01-04 14:03:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 51.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, Prefix caINFO: 10.45.190INFO 01-04 14:03:23 [loggers.py:111] Engine 000: Avg prompt throughput: 241.7 tokens/s, Avg generation throughput: 87.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hitINFO 01-04 1INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
85
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
86
+ INFO 01-04 14:03:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
87
+ INFO 01-04 14:03:37 [loggers.py:111] Engine 000: Avg prompt throughput: 126.7 tokens/s, Avg generation throughput: 82.6 tokINFO 01-04 14:03:39 INFO 01-04 14:03:43 [loggers.py:111] Engine 000: Avg prompt throughput: 139INFO 01-04 14:03:47 [loggers.py:111] Engine 000: Avg prompt throughput: 123.8 tokens/s, Avg generation throughput: 99.1 tokens/s, Running: 3 reqINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
88
+ INFO 01-04 14:03:53 [loggers.py:111] Engine 000: Avg prompt throughput:INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
89
+ INFO 01-04 14:03:57 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 1INFO 01-04 14:04:03 [loggers.py:111] Engine 000: Avg prompt throughput: 79.5 tokens/s, Avg generation throughput: 62.8 tokens/s, Running:INFO 01-04 14:04:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.4 tokens/s, Running: 2 reqs, WaINFO 01-04 14:04:13 [loggers.py:111] Engine 000: Avg prompt throughput: 134.8 tokens/s, Avg generation throughput: 77.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cachINFO: 10.46.17.192:0 - "POST /v1/completINFO 01-04 1INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
90
+ INFO 01-04 14:04:19 [loggers.INFO 01-04 14:04:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 18.0 tokens/s, RunniINFO 01-04 14:04:27 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:04:29 [loggersINFO 01-04 14:04:33 [loggers.py:111] Engine 000: Avg prompt throughput: 193.7 tokens/s, Avg generation throughput: 40.1 tokens/s, RunnINFO 01-04 14:04:37 [loggers.py:111] Engine 000: Avg prompt throughput: 141.9 tokens/s, Avg generation throughput: 85.0 tokINFO 01-04 14:04:39 [loggINFO 01-04 14:04:43 [loggers.py:111] Engine 000: Avg prompt throughputINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
91
+ INFO 01-04 14:04:47 [loggers.py:111] Engine 000: Avg promINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
92
+ INFO 01-04 14:04:49 [loggeINFO 01-04 14:04:53 [loggers.py:111] Engine 000: Avg prompt throughput: 188.0 tokens/s, Avg generation throughput: 51.7 tokens/s, RuINFO 01-04 14:04:57 [loggers.py:111] Engine 000: Avg prompt throughput: 202.9 tokens/sINFO 01-04 14:05:03 [loggers.py:111] Engine 000: Avg prompt throughput: 118.1 tokens/s, Avg generation throughput: 72.5 tokens/s, RuINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
93
+ INFO: 10.46.17.1INFO: 10.46.17.192:0 - "POST /v1/completioINFO 01-04 14:05:07 INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
94
+ INFO 01-04 14:05:13 [loggers.py:111] Engine 000: Avg prompt throughput: 208.7 tokens/s, Avg generation throughput: 77.8 tokens/s, RunnINFO 01-04 14:05:17 [loggers.py:111] Engine 000: Avg prompt throughput: 178.8 tokensINFO 01-04 14:05:23 [loggers.py:111] Engine 000: Avg prompt throughput: 155.6 tokens/s, Avg generation throughput: 74.9 tokens/s, RunnINFO 01-04 14:05:27 [loggers.py:111] Engine 000: Avg prompt throughput: 175.9 tokensINFO 01-04 14:05:33 [loggers.py:111] Engine 000: Avg prompt throughput: 164.6 tokens/s, Avg generation throughput: 105.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 6.4%
95
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
96
+ INFO: 10.45.190.192:0 - "POST /v1/completioINFO 01-04 14:05:37 INFO 01-04 14:05:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 76.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cINFO 01-04 14:05:47 [loggers.py:111] Engine 000: Avg prompt throughput: 165.1 tokensINFO 01-04 14:05:53 [loggers.py:111] Engine 000: Avg prompt throughput: 332.1 tokens/s, Avg generation throughput: 80.2 tokens/s, RunnINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
97
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
98
+ INFO 01-04 14:05:5INFO 01-04 14:06:03 [loggers.py:111] Engine 000: Avg prompt throughput: 195.5 tokens/s, Avg generation throughput: 69.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cacINFO 01-04 14:06:0INFO 01-04 14:06:13 [loggers.py:111] Engine 000: Avg prompt throughput: 132.6 tokens/s, Avg generation throughput: 120.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix caINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
99
+ INFO 01-04 14:06:1INFO 01-04 14:06:23 [loggers.py:111] Engine 000: Avg prompt throughput: 279.3 tokens/s, Avg generation throughput: 123.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 5.0%
100
+ INFO: 10.46.50.192:0 - "POST /v1/completiINFO 01-04 14:06:27 [INFO 01-04 14:06:33 [loggers.py:111] Engine 000: Avg prompt throughput: 266.1 tokens/s, Avg generation throughput: 122.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 4.7%
101
+ INFO: 10.46.17.192:0 - "POST /v1/completiINFO 01-04 14:06:37 [INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
102
+ INFO 01-04 14:06:43 [loggers.py:111] Engine 000: Avg prompt throughput: 248.5 tokens/s, Avg generation throughput: 102.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, PrefixINFO: 10.45.190.19INFO 01-04 14:06:53 [loggers.py:111] Engine 000: Avg prompt throughput: 365.8 tokens/s, Avg generation throughput: 112.0 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 7.4%
103
+ INFO 01-04 14:07:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 177.2 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hit rate: 7.4%
104
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
105
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
106
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
107
+ INFO 01-04 14:07:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 88.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cacINFO: 10.43.30INFO 01-04 14:07:23 [loggers.py:111] Engine 000: Avg prompt throughput: 507.3 tokens/s, Avg generation throughput: 87.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 9.2%
108
+ INFO: 10.46.50.192:0 - "POST /v1/completINFO 01-04 14:07:27 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:07INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
109
+ INFO 01-04 14:07:33 [loggers.py:111] Engine 000: Avg prompt throughput: 154.9 tokens/s, Avg generation throughpINFO 01-04 14:07:36 [INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
110
+ INFO INFO 01-04 14:07:3INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
111
+ INFO 01-04 14:07:43 [loggers.py:111] Engine 000: Avg prompt throughput: 283.0 tokens/s, Avg generation throughINFO: 10.43.30INFO 0INFO 01-04 14:07:47 [loggers.py:111] Engine 000: Avg prompt throughput: 345.4 tokens/s, Avg generation throughput: 202.0 tokens/INFO 01-04 14:07:49 INFO 01-04 14:07:53 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:07:57 [loggers.py:111] Engine 000: Avg prompt throughput: 199.4 tokens/s, Avg generation throughput: 105.6 tokens/s, RunINFO: 10.46.50.1INFO 01-04 14:08:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, RunnINFO 01-04 14:08:07 [loggers.py:111] Engine 000: Avg prompt throughput: 166.5 tokensINFO 01-04 14:08:13 [loggers.py:111] Engine 000: Avg prompt throughput: 218.1 tokens/s, Avg generation throughput: 84.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix caINFO 01-04 14:08:17 [loggers.py:111] Engine 000: Avg prompt throughput: 263.8 tokenINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
112
+ INFO 01-04 14:08:23 [loggers.py:111] Engine 000: Avg prompt throughput: 140.0 tokens/s, Avg generation throughput: 76.1 tokens/s, RunninINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
113
+ INFO 01-04 14:08:2INFO 01-04 14:08:33 [loggers.py:111] Engine 000: Avg prompt throughput: 342.5 tokens/s, Avg generation throughput: 71.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix caINFO 01-04 14:08:37 [loggers.py:111] Engine 000: Avg prompt throughput: 571.2 tokenINFO 01-04 14:08:43 [loggers.py:111] Engine 000: Avg prompt throughput: 226.9 tokens/s, Avg generation throughput: 77.9 tokens/s, RunniINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
114
+ INFO 01-04 14:08:47 [loggers.py:111] Engine 000: Avg prompt throughput: 284.4 tokenINFO 01-04 14:08:53 [loggers.py:111] Engine 000: Avg prompt throughput: 323.4 tokens/s, Avg generation throughput: 76.2 tokens/s, RunninINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
115
+ INFO: 10.45.INFO 01-04 14:09:03 [loggers.py:111] Engine 000: Avg prompt throughput: 289.2 tokens/s, Avg generation throughput: 117.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: 7.6%
116
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
117
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
118
+ INFO 01-04 14:09:13 [loggers.py:111] Engine 000: Avg prompt throughput: 358.6 tokens/s, Avg generation throughput: 94.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cachINFO: 10.45.1INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
119
+ INFO 01-04 14:09:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 74.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hINFO: 10.4INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
120
+ INFO 01-04 14:09:33 [loggers.py:111] Engine 000: Avg prompt throughput: 354.6 tokens/s, Avg generation throughput: 71.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cacheINFO 01-04 14:09INFO 01-04 14:09:43 [loggers.py:111] Engine 000: Avg prompt throughput: 771.7 tokens/s, Avg generation throughput: 85.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 8.5%
121
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTINFO 01-04 14:INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
122
+ INFO 01-04 14:09:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 99.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hiINFO: 10.INFO: 10.46.17.192:0 - "POST /v1/completions HTTPINFO 01-04 14INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
123
+ INFO 01-04 14:10:03 [loggers.py:111] Engine 000: Avg prompt throughput: 815.3 tokens/s, Avg generation throughput: 58.9 tokens/s, Running: INFO 01-04 14:10:07 [loggers.py:111] Engine 000: Avg prompt throughput: 316.7 toINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
124
+ INFO 01-04 14:10:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 43.2 tokens/s, Running: INFO 01-04 14:10:17 [loggers.py:111] Engine 000: Avg prompt throughput: 458.3 toINFO 01-04 14:10:23 [loggers.py:111] Engine 000: Avg prompt throughput: 378.2 tokens/s, Avg generation throughput: 39.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hINFO 01-04 14:10:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokensINFO 01-04 14:10:33 [loggers.py:111] Engine 000: Avg prompt throughput: 383.2 tokens/s, Avg generation throughput: 33.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cacINFO 01-04 14:10:37INFO 01-04 14:10:43 [loggers.py:111] Engine 000: Avg prompt throughput: 237.6 tokens/s, Avg generation throughput: 80.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 11.1%
125
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
126
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
127
+ INFO 01-04 14:10:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 53.4 tokens/s, INFO 01-04 14:10:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvgINFO 01-04 14:11:03 [loggers.py:111] Engine 000: Avg prompt throughput: 126.0 tokens/s, Avg generation throughput: 18.9 tokens/INFO 01-04 14:11:07 [loggers.py:111] Engine 000: Avg prompt throughput: 424.9 tokens/s, Avg INFO 01-04 14:11:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, PrefINFO: 10.46.17.192:0 INFO 01-04 14:11:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cache hit rate: 11.0%
128
+ INFO 01-04 14:11:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 11.0%
129
+ INFO: 10.43.30.3:0 - "POST /v1/comINFO 01-04 14:11:47 [loggers.py:111] Engine 000: Avg prompt throughput: 405.3 tokens/s, Avg generation throughput: 26.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix cache hit rate: 7.7%
130
+ INFO 01-04 14:11:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 7.7%
131
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
132
+ INFO 01-04 14:12:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 10.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 7.7%
133
+ INFO 01-04 14:12:17 [loggers.py:111] Engine 000: Avg prompt throughput: 295.1 tokens/s, Avg generation throughput: 29.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 7.5%
134
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
135
+ INFO 01-04 14:12:27 [loggers.py:111] Engine 000: Avg prompt throughput: 199.6 tokens/s, Avg generation throughput: 77.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.0%, Prefix cache hit rate: 7.4%
136
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
137
+ INFO 01-04 14:12:3INFO 01-04 14:12:37 [loggers.py:111] Engine 000: Avg prompt throughput: 302.2 tokens/s, Avg generation throughput: 17.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cacINFO 01-04 14:12:4INFO 01-04 14:12:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cacINFO: 10.45.19INFO: 10.46.17.192:0 - "POST /v1/completions INFO 01-04 14:12:INFO 01-04 14:12:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 31.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KINFO: 10.46.17.192:0 - "POSTINFO 01-04 14:13:INFO 01-04 14:13:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hINFO: 10.4INFO 01-04 14:13:17 [loggers.py:111] Engine 000: Avg prompt throughput: 392.7 tokens/s, Avg generation throughput: 17.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 10.0%
138
+ INFO 01-04 14:13:27 [loggers.py:111] Engine 000: Avg prompt throughput: 464.7 tokens/s, Avg generation throughput: 45.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 9.6%
139
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
140
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
141
+ INFO 01-04 14:13:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 54.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 9.6%
142
+ INFO 01-04 14:13:47 [loggers.py:111] Engine 000: Avg prompt throughput: 818.8 tokens/s, Avg generation throughput: 43.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 9.1%
143
+ INFO 01-04 14:13:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 9.1%
144
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
145
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
146
+ INFO 01-04 14:14:07 [loggers.py:111] Engine 000: Avg prompt throughput: 359.1 tokens/s, Avg generation throughput: 51.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 8.8%
147
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
148
+ INFO 01-04 14:14:17 [loggers.py:111] Engine 000: Avg prompt throughput: 212.3 tokens/s, Avg generation throughput: 66.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cache hit rate: 9.0%
149
+ INFO 01-04 14:14:27 [loggers.py:111] Engine 000: Avg prompt throughput: 337.4 tokens/s, Avg generation throughput: 81.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 8.8%
150
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
151
+ INFO 01-04 14:14:37 [loggers.py:111] Engine 000: Avg prompt throughput: 347.8 tokens/s, Avg generation throughput: 84.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 10.4%
152
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
153
+ INFO 01-04 14:14:47 [loggers.py:111] Engine 000: Avg prompt throughput: 384.5 tokens/s, Avg generation throughput: 124.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 10.1%
154
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
155
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
156
+ INFO 01-04 14:14:57 [loggers.py:111] Engine 000: Avg prompt throughput: 886.1 tokens/s, Avg generation throughput: 61.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 10.8%
157
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
158
+ INFO 01-04 14:15:07 [loggers.py:111] Engine 000: Avg prompt throughput: 569.5 tokens/s, Avg generation throughput: 71.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: 10.5%
159
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
160
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTINFO: 10.45INFO 01-04 14:15:17 [loggers.py:111] Engine 000: Avg prompt throughput: 459.1 tokens/s, Avg generation throughput: 54.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 10.2%
161
+ INFO: 10.45.190.192:0 - "POST /v1/completions HINFO: 10.46.INFO 01-04 14:15:27 [loggers.py:111] Engine 000: Avg prompt throughput: 458.6 tokens/s, Avg generation throughput: 48.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 12.4%
162
+ INFO 01-04 14:15:37 [loggers.py:111] Engine 000: Avg prompt throughput: 309.6 tokens/s, Avg generation throughput: 63.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 12.2%
163
+ INFO 01-04 14:15:47 [loggers.py:111] Engine 000: Avg prompt throughput: 525.6 tokens/s, Avg generation throughput: 119.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.9%, Prefix cache hit rate: 11.9%
164
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
165
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
166
+ INFO 01-04 14:15:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 97.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 11.9%
167
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
168
+ INFO 01-04 14:16:07 [loggers.py:111] Engine 000: Avg prompt throughput: 435.0 tokens/s, Avg generation throughput: 81.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 11.6%
169
+ INFO 01-04 14:16:17 [loggers.py:111] Engine 000: Avg prompt throughput: 701.2 tokens/s, Avg generation throughput: 78.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 11.2%
170
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
171
+ INFO 01-04 14:16:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 127.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 11.2%
172
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
173
+ INFO 01-04 14:16:37 [loggers.py:111] Engine 000: Avg prompt throughput: 654.4 tokens/s, Avg generation throughput: 127.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hit rate: 10.8%
174
+ INFO 01-04 14:16:47 [loggers.py:111] Engine 000: Avg prompt throughput: 675.2 tokens/s, Avg generation throughput: 106.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:16:47 [loggers.py:111] Engine 000INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
175
+ INFO 01-04 14:16:57 [loggers.py:111] Engine 000: Avg prompt throughput: 1014.1 tokens/s, Avg generation throughput: 164.8 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 10.0%, Prefix cache hit rate: 11.7%
176
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
177
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
178
+ INFO 01-04 14:17:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 146.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hit rate: 11.7%
179
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
180
+ INFO 01-04 14:17:17 [loggers.py:111] Engine 000: Avg prompt throughput: 316.3 tokens/s, Avg generation throughput: 92.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPUINFO 01-04 14:17:17 [loggers.py:111] Engine 000: AvgINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
181
+ INFO 01-04 14:17:27 [loggers.py:111] Engine 000: Avg prompt throughput: 556.7 tokens/s, Avg generation throughput: 106.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV INFO: 10.46.17.192:0 - "POST /v1/completionsINFO: 10.45.19INFO 01-04 14:17:33 [loggers.py:111] Engine 000: INFO 01-04 14:17:37 [loggers.py:111] Engine 000: Avg prompt throughput: 964.4 tokens/s, Avg generation throughput: 105.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KVINFO: 10.46.17.192:0 - "POST /v1/completions INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
182
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
183
+ INFO 01-04 14:17:47 [loggers.py:111] Engine 000: Avg prompt throughput: 483.4 tokens/s, Avg generation throughput: 120.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KVINFO: 10.43.30.3:0 - "POST /v1/completions HTINFO 01-04 14:17:57 [loggers.py:111] Engine 000: Avg prompt throughput: 410.0 tokens/s, Avg generation throughput: 106.2 tokens/s, Running: 3 reqs, Waiting: 0 reqINFO 01-04 14:17:57 [loggers.py:111] Engine 000: Avg prompINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
184
+ INFO 01-04 14:18:07 [loggers.py:111] Engine 000: Avg prompt throughput: 394.1 tokens/s, Avg generation throughput: 153.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.9%, Prefix cache hit rate: 13.4%
185
+ INFO: 10.45INFO 01-04 14:18:13 [loggers.py:111] Engine 000: Avg prompt throughput: 551.9 tokens/s, Avg generation throughput: 122INFO 01-04 14:18:17 [loggers.py:111] Engine 000: Avg prompt throughput: 587.7 tokens/s, Avg generationINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
186
+ INFO 01-04 14:18:23 [loggers.py:111] Engine 000: AINFO 01-04 14:18:27 [loggers.py:111] Engine 000: Avg prompt throughput: 543.7 tokens/s, Avg generation throughput: 129.9 tokens/s, Running: 3 reqs, Waiting: 0 rINFO 01-04INFO: 10.43.30.5:0 - "POST /v1/completions HTTINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
187
+ INFO 01-04 14:18:37 [loggers.py:111] Engine 000: Avg prompt throughput: 539.9 tokens/s, Avg generation throughput: 158.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 9.4%, Prefix cache hit rate: 15.1%
188
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
189
+ INFO 01-04 14:18:47 [loggers.py:111] Engine 000: Avg prompt throughput: 241.8 tokens/s, Avg generation throughput: 156.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.5%, Prefix cache hit rate: 15.0%
190
+ INFO 01-04 14:18:57 [loggers.py:111] Engine 000: Avg prompt throughput: 315.6 tokens/s, Avg generation throughput: 144.8 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 10.4%, Prefix cache hit rate: 14.8%
191
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
192
+ INFO 01-04 14:19:07 [loggers.py:111] Engine 000: Avg prompt throughput: 613.2 tokens/s, Avg generation throughput: 160.6 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.5%, Prefix cache hit rate: 14.7%
193
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
194
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
195
+ INFO 01-04 14:19:17 [loggers.py:111] Engine 000: Avg prompt throughput: 282.8 tokens/s, Avg generation throughput: 153.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cache hit rate: 14.5%
196
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
197
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
198
+ INFO 01-04 14:19:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 110.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 14.5%
199
+ INFO 01-04 14:19:37 [loggers.py:111] Engine 000: Avg prompt throughput: 577.7 tokens/s, Avg generation throughput: 61.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 14.2%
200
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
201
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
202
+ INFO 01-04 14:19:47 [loggers.py:111] Engine 000: Avg prompt throughput: 498.2 tokens/s, Avg generation throughput: 83.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GINFO 01-04 14:19:53 [loggers.py:111] Engine 000: Avg prompt throughput: 451.6 tokens/s, Avg generation throughput: 93.0 INFO 01-04 14:19:57 [loggers.py:111] Engine 000: Avg prompt throughput: 1566.2 tokens/s, Avg generation throughput: 77.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
203
+ INFO 01-04 14:20:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg genINFO 01-04 14:20:05 [loggeINFO 01-04 14INFO 01-04 14:20:07 [loggers.py:111] Engine 000: Avg prompt throughput: 606.0 tokens/s, Avg generation throughput: 8.4 tokens/s, Running: 1 reqs, WaiINFO 01-04 14:20:13 [loggers.pyINFO 01-04 14:20:15 [logINFO 01-04 14:20:17 [loggers.py:111] Engine 000: Avg prompt throughput: 631.6 tokens/s, Avg generation throughput: 35.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, INFO: 10.43.30.5:0 - "POST INFO: 10.46.50.192:0INFO: INFO 01-04 14:20:23 [loggers.py:1INFO 01-04 14:20:25 [logINFO 01-04 14:20:27 [loggers.py:111] Engine 000: Avg prompt throughput: 813.4 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reINFO 01-04 14:20:33 [loggers.py:111INFO: 10.45.190.192:INFO 01-04 14:20:37 [loggers.py:111] Engine 000: Avg prompt throughput: 934.1 tokens/s, Avg generation throughput: 59.4 tokens/s, Running: 2 reqs, Waiting: 0 reINFO 01-04 14:20:43 [loggers.py:111] Engine 000: Avg prompt throughput: 385.3 tokens/s, Avg generation throughput: 125.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GINFO 01-04 14:20:45 [loggINFO 01-04 14:20:47 [loggers.py:111] Engine 000: Avg prompt throughput: 499.6 tokens/s, Avg genINFO 01-04 14:20:53 [loggers.py:111] Engine 000: Avg prompt throughput: 348.3 tokens/s, Avg generation throughput: 135.1 tokINFO: 1INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1INFO 01-04 INFO 01-04 14:20:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s,INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
204
+ INFO: 10.45.190.192:0 - "POSINFO: 10.43.30.3:0 - INFO 01-04 14:21:07 [loggers.py:111] Engine 000: Avg prompt throughput: 1571.0 tokens/s, Avg generation tINFO 01-04 14:21:08 [loggers.py:111] Engine 000: Avg prompt throughput: 1191.6 tokens/s, Avg generation throughput:INFO 01-04 14:21:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 89.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV caINFO 01-04 14:21:18 [loggers.py:111] Engine 00INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
205
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
206
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
207
+ INFO 01-04 14:21:27 [loggers.py:111] Engine 000: Avg prompt throughput: 1640.0 tokens/s, Avg generation thrINFO 01-04 14:21:28 [loggers.py:111] Engine 000: Avg promptIINFO 01-04 14:21:33 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:21:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throuINFO 01-04 14:21:38 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:21:43 [loggers.py:111] Engine 000: AvgINFO: INFO 01-04 14:21:47 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:21:47 [loggers.py:111] Engine 000: Avg prompt throughput: 430.7 tokens/s, Avg generatioINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 2INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
208
+ INFO 01-04 14:21:57 [loggers.py:111] Engine 000: Avg prompt throughput: 741.4 tokens/s, Avg generatioINFO 01-04 14:22:03 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:22:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.8 tokens/s, Running: 2 reqs, Waiting: 0 INFO 01-04 14:22:13 [loggers.py:111] Engine 000: Avg prompt throughput: 649.5 tokens/s, Avg generation throughput: 108.5 tokensINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1INFO 01-04 14:22:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 62.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 18.0%
209
+ INFO 01-04 14:22:27 [loggers.py:111] Engine 000: Avg prompt throughput: 1334.7 tokens/s, Avg generation throughput: 87.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.9%, Prefix cache hit rate: 17.4%
210
+ INFO 01-04 14:22:37 [loggers.py:111] Engine 000: Avg prompt throughput: 639.9 tokens/s, Avg generation throughput: 131.4 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 12.0%, Prefix cache hit rate: 17.1%
211
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
212
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
213
+ INFO 01-04 14:22:47 [loggers.py:111] Engine 000: Avg prompt throughput: 607.4 tokens/s, Avg generation throughput: 130.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs,INFO 01-04 14:22:53 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:22:57 [loggers.py:111] Engine 000: Avg prompt throughput: 396.3 tokens/s, Avg generation throughput: 130.5 tokens/s, Running: 4 reqs, Waiting: 0 reINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200INFOINFO 01-04 14:23:03 [loggers.py:111] Engine 000: Avg prompt throughput: 611.2 tokens/s, Avg gINFO 01-04 14:23:05 [loggers.py:11INFO 01-04 14:23:07 [loggers.py:111] Engine 000: Avg prompt throughput: 356.6 tokens/s, Avg INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
214
+ INFO 01-04 14:23:13 [loggeINFO: 10.46.17.192:0 - "POST /INFO 01-04 14:23:17 [loggers.py:111] Engine 000: Avg prompt throughput: 621.3 tokens/s, Avg generation throughput: 58.7 tokens/s, RINFO 01-04 14:23:17 [loggersINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
215
+ INFO 01-04 14:23:23 [logINFO 01-04 14:23:25 [loggers.py:11INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
216
+ INFO 01-04 14:23:27 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:23:27 [loggerINFO 01-04 14:23:33 [loggers.py:111] Engine 000: Avg prompt throughput: 474.7 tokens/s, Avg generation throughput: 51.2 toINFO 01-04 14:23:37 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:23:37 [loggerINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
217
+ INFO 01-04 14:23:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg INFO 01-04 14:23:45 [loggersINFO 01-04 14:23:47 [loggers.py:111] Engine 000: Avg prompt throughput: 733.8 tokens/s, Avg generatINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
218
+ INFO 01-04 14:23:53 [loggers.py:111] Engine 000: Avg prompt throughput: 414.2 tokens/s, Avg INFO 01-04 14:23:55 [loggers.pINFO 01-04 14:23:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO 01-04 14:24:03 [loggers.py:111] Engine 000: Avg prompt throughput: 587.7 tokens/s, Avg generation throughput: 73.8INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
219
+ INFO 01-04 14:24:07 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:24:07 [loggers.py:INFO 01-04 14:24:13 [loggers.py:111] Engine 000: Avg prompt throughput: 759.1 tokens/s, Avg generation throughput: 64.INFO 01-04 14:24:17 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:24:17 [loggers.py:111] Engine 000: Avg prompt throughput: 502.5 tokens/s, Avg generaINFO 01-04 14:24:23 [loggers.py:111] Engine 000: Avg INFO 01-04 14:24:27 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:24:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, WaitinINFO: 1INFO 01-04 14:24:33 [loggers.py:111] Engine 000: Avg ININFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
220
+ INFO 01-04 14:24:37 [loggers.py:111] Engine 000: Avg prompt throughput: 744.9 tokens/s, Avg generINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
221
+ INFO 01-04 14:24:43INFO 01-04 14:24:45 [loggers.py:111INFO 01-04 14:24:47 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:24:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generaINFO 01-04 14:24:53 [loggers.py:111] Engine 000: AvgINFO 01-04 14:24:57 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:24:57 [loggers.py:111] Engine 000: Avg prompt throughput: 197.1 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, WaiINFO: 10INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/ININFO 01-04 14:25:07 [loggers.py:111] Engine 000: Avg prompt throughput: 767.7 tokens/s, Avg generation throughput: 83.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 15INFO 01-04 14:25:17 [loggers.py:111] Engine 000: Avg prompt throughput:INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
222
+ INFO 01-04 14:25:17 [loggers.py:111] Engine 000: Avg prompt throughput: 645.8 tokensINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
223
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
224
+ ININFO 01-04 14:25:27 [loggers.py:111] Engine 000: Avg prompt throughput: 849.6 tokens/s, Avg generation throughput: 60.4 tokens/s, Running: 2 reqs, Waiting: 0 reINFO 01-04 14:25:33 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:25:37 [loggers.py:111] Engine 000: Avg prompt throughput: 698.2 tokens/s, Avg generation throughput: 21.3 tokens/s, RunniINFO 01-04 14:25:37 [loggerINFO 01-04 14:25:43 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:25:47 [loggers.py:111] Engine 000: Avg prompt throughput: 555.0 tokens/s, Avg generation throughput: 66.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs,INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
225
+ INFO 01-04 14:25:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 118.0 tokenIINFO 01-04 14:25:57 [loggers.py:111] Engine 000: Avg prompt throughput: 761.9 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 2 reqs, Waiting: 0 rINFO 01-04 14:26:03 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:26:07 [loggers.py:111] Engine 000: Avg prompt throughINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
226
+ INFO 01-04 14:26:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
227
+ INFO 01-04 14:26:17 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:26:17 [loggers.py:111] Engine 000: Avg prompt throughput: 774.4 tokens/s, Avg generatINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.INFO 01-04 14:26:27 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:26:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.7 tokens/s, Running: 1 reqs, Waiting:INFO 01-04 14:26:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.3 tokens/s, RuINFO 01-04 14:26:37 [loggers.py:111] Engine 000: Avg prompt throughput: 944.1 tokens/s,INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
228
+ INFO 01-04 14:26:47 [loggers.py:111] Engine 000: Avg prompt throughput: 943.7 tokens/s, Avg generation throughput: 48.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 16.9%
229
+ INFO: 10.43.30.4:0 - "INFO: 10.45.190.192:0 - "POST /v1/INFO 01-04 14:26:57 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:26:57 [loggers.py:111] Engine 000: Avg prompt throughput: 552.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting:INFO 01-04 14:27:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 18.0%
230
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
231
+ INFO 01-04 14:27:17 [loggers.py:111] Engine 000: Avg prompt throughput: 832.9 tokens/s, Avg generation throughput: 38.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 17.7%
232
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
233
+ INFO 01-04 14:27:27 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:27:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 56.5 tokens/s, Running: 1 reqs, WaitiINFO 01-04 14:27:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RunnINFO 01-04 14:27:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s,INFO 01-04 14:27:57 [loggers.py:111] Engine 000: Avg prompt throughput: 1181.9 tokens/s, Avg generation throughput: 41.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.8%, Prefix cache hit rate: 17.3%
234
+ INFO 01-04 14:28:07 [loggers.py:111] Engine 000: Avg prompt throughput: 797.1 tokens/s, Avg generation throughput: 52.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.1%, Prefix cache hit rate: 17.0%
235
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
236
+ INFO 01-04 14:28:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 87.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 17.0%
237
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
238
+ INFO 01-04 14:28:27 [loggers.py:111] Engine 000: Avg prompt throughput: 642.9 tokens/s, Avg generation throughput: 7.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 16.8%
239
+ INFO 01-04 14:28:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 16.8%
240
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
241
+ INFO 01-04 14:28:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 6.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.8%
242
+ INFO 01-04 14:28:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.8%
243
+ INFO 01-04 14:29:07 [loggers.py:111] Engine 000: Avg prompt throughput: 706.4 tokens/s, Avg generation throughput: 26.5 tokens/s, RunINFO 01-04 14:29:07 [loggers.py:111] Engine 000: Avg prompt throughput: 1099.8 tokens/INFO 01-04 14:29:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, RunninINFO 01-04 14:29:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
244
+ INFO 01-04 14:29:27 [loggers.py:111] Engine 000: Avg prompt throughput: 714.1 tokens/s, Avg generation throughput: 53.9 tokens/s, RunINFO 01-04 14:29:27 [loggers.py:111] Engine 000: Avg prompt throughput: 972.9 tokens/sINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
245
+ INFO 01-04 14:29:37 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:29:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, WaitiINFO 01-04 14:29:47 [loggers.py:111] Engine 000: Avg prompt throughput: 1500.7 tokens/s, Avg generation throughput: 32.1 tokens/s, RuINFO 01-04 14:29:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
246
+ INFO 01-04 14:29:57 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:29:57 [loggers.py:111] Engine 000: Avg prompt throughput: 452.7 tokens/s, Avg generation throughput: 15.5 tokens/s, Running: 1 reqs, WaitiINFO 01-04 14:30:07 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:30:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, WaitingINFO 01-04 14:30:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 16.6%
247
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
248
+ INFO 01-04 14:30:27 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:30:27 [loggers.py:111] Engine 000: Avg prompt throughput: 516.8 tokens/s, Avg generation throughput: 13.5 tokens/s, Running: 1 reqs, WaiINFO 01-04 14:30:37 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:30:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, WaINFO 01-04 14:30:47 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:30:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, WaitiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
249
+ INFO 01-04 14:30:57 [loggers.py:111] Engine 000: Avg prompt throughput: 1098.1 tokens/s, Avg generation throughput: 51.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.0%, Prefix cache hit rate: 14.INFO 01-04 14:31:07 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:31:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 49.9 tokens/s, Running: 1 reqs, WaitingINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
250
+ INFO 01-04 14:31:17 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:31:17 [loggers.py:111] Engine 000: Avg prompt throughput: 903.7 tokens/s, Avg generation throughput: 5.7 tokens/s, Running: 1 reqs, WaitINFO 01-04 14:31:27 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:31:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.1 tokens/s, Running: 1 reqs, WaiINFO 01-04 14:31:37 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:31:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generaINFO 01-04 14:31:45 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:31:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.4 tokens/s, RunniINFO 01-04 14:31:47 [loggers.py:INFO 01-04 14:31:55 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.INFO 01-04 14:31:57 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:31:57 [loggers.py:1INFO 01-04 14:32:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.INFO 01-04 14:32:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation INFO 01-04 14:32:15 [loggers.py:111] Engine 000: Avg prompt throughput: 819.8 tokens/s, Avg generation throughput: INFO 01-04 14:32:27 [loggers.py:111] Engine 000: Avg prompt throughput: 471.4 tokens/s, Avg generation throughput: 11.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 17.0%
251
+ INFO 01-04 14:32:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, RunningINFO 01-04 14:32:57 [loggers.py:111] Engine 000: Avg prompt throughput: 832.5 tokens/s, Avg generation throughput: 43.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 13.8%
252
+ INFO 01-04 14:33:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 13.8%
253
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
254
+ INFO 01-04 14:33:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 39.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.8%
255
+ INFO 01-04 14:33:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.8%
256
+ INFO 01-04 14:33:47 [loggers.py:111] Engine 000: Avg prompt throughput: 849.0 tokens/s, Avg generation throughput: 6.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 14.9%
257
+ INFO 01-04 14:33:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 14.9%
258
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
259
+ INFO 01-04 14:34:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokINFO 01-04 14:34:27 [loggers.py:111] Engine 000: Avg prompt throughput: 579.9 tokens/s, Avg generation throughput: 22.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 17.3%
260
+ INFO 01-04 14:34:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 17.3%
261
+ INFO: 10.46.INFO 01-04 14:34:45 [loggers.py:111] Engine 000: AINFO 01-04 14:34:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 33.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV INFO 01-04 14:34:55 [loggers.py:111] Engine 000:INFO 01-04 14:34:57 [loggers.py:111] Engine 000: Avg prompt throughput: 1370.5 tokens/s, Avg generation throughput: 39.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GINFO 01-04 14:34:59 [loggers.py:111] EngineINFO 01-04 14:35:16 [loggers.py:111] Engine 000: Avg prompt throughput: 518.8 tokens/s, Avg generation throughput: 2.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 18.5%
262
+ INFO 01-04 14:35:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 18.5%
263
+ INFO: 10.46.50.192:0INFO 01-04 14:35:45 [loggers.py:111] Engine 000: INFO 01-04 14:36:47 [loggers.py:111] Engine 000: Avg prompt throughput: 1461.8 tokens/s, Avg generation throughput: 9.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.8%, Prefix cache hit rate: 19.9%
264
+ INFO 01-04 14:36:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.0%, Prefix cache hit rate: 19.9%
265
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
266
+ INFO 01-04 14:37:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 19.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.9%
267
+ INFO 01-04 14:37:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPUINFO 01-04 14:37:29 [loggers.py:111] Engine 000: AvgINFO 01-04 14:38:37 [loggers.py:111] Engine 000: Avg prompt throughput: 1114.2 tokens/s, Avg generation throughput: 3.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 20.5%
268
+ INFO 01-04 14:38:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cache hit rate: 20.5%
269
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
270
+ INFO 01-04 14:38:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 2.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.5%
271
+ INFO 01-04 14:39:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.5%
272
+ INFO 01-04 14:40:17 [loggers.py:111] Engine 000: Avg prompt throughput: 603.6 tokens/s, Avg generation throughput: 35.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 20.3%
273
+ INFO 01-04 14:40:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 20.3%
274
+ INFO 01-04 14:40:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 20.3%
275
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
276
+ INFO 01-04 14:40:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.3%
277
+ INFO 01-04 14:40:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20INFOINFO 01-04 14:41:57 [loggers.py:111] Engine 000: Avg prompt throughput: 634.9 tokens/s, Avg generation throughput: 44.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 21.0%
278
+ INFO 01-04 14:42:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 21.0%
279
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
280
+ INFO 01-04 14:42:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 21.0%
281
+ INFO 01-04 14:42:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.INFO 01-04 14:42:37 [loggers.py:111] Engine 000: Avg prompt throughput: 1122.6 tokens/s, Avg generation throughput: 45.8 tokens/s, Running: 1 INFO 01-04 14:43:07 [loggers.py:111] Engine 000: Avg prompt throughput: 726.2 tokens/s, Avg generation throughput: 20.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 20INFO 01-04 14:43:46 [loggers.py:111] Engine 000: Avg prompt throughput: 639.0 tokens/s, Avg generation throughput: 34.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 18.5%
282
+ INFO 01-04 1INFO 01-04 14:44:05 [loggers.py:111] Engine 000: Avg prompt throughput: 1187.5 tokens/s, Avg generation throughput: 40.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.8%, Prefix cache hINFO 01-04 14:4INFO 01-04 14:44:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.1 tokens/s, RINFO 01-04 14:44:37 [loggers.py:111] Engine 000: Avg prompt throughput: 700.7 tokens/s, Avg genINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
283
+ INFO 01-04 14:45:19 [loggers.py:111] Engine 000: Avg prompt throughput: 1233.9 tokens/s, Avg generation throughput: 37.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, PrefINFO 01-04 14:48:27 [loggers.py:111] Engine 000: Avg prompt throughput: 481.7 tokens/s, Avg generation throughput: 39.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 20.6%
284
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
285
+ INFO 01-04 14:48:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 43.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.6%
286
+ INFO 01-04 14:48:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.6%
287
+ INFO 01-04 15:04:17 [loggers.py:111] Engine 000: Avg prompt throughput: 497.6 tokens/s, Avg generation throughput: 19.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 20.5%
288
+ INFO 01-04 15:04:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 20.5%
289
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
290
+ INFO 01-04 15:04:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 42.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.5%
291
+ INFO 01-04 15:04:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.5%
292
+ 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 10.6%
293
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
294
+ INFO 01-04 15:01:39 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 33.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.6%
295
+ INFO 01-04 15:01:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.6%
296
+ ration throughput: 29.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.1%
297
+ INFO 01-04 15:01:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.1%
298
+ ning: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.5%
299
+ INFO 01-04 15:01:25 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.5%
300
+ : 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 14.7%
301
+ INFO 01-04 14:51:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 14.7%
302
+ INFO 01-04 14:51:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 14.7%
303
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
304
+ INFO 01-04 14:51:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 2.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.7%
305
+ IINFO 01-04 14:58:18 [loggers.py:111] Engine 000: Avg prompt throughput: 567.3 tokens/s, Avg generation throughput: 26.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 16.4%
306
+ INFO 01-04 14:58:28 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 16.4%
307
+ INFO 01-04 14:58:38 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 16.4%
308
+ INFO 01-04 14:58:48 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 16.4%
309
+ INFO 01-04 14:58:58 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 16.4%
310
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
311
+ INFO 01-04 14:59:08 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 6.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.4%
312
+ INFO 01-04 14:59:18 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.4%
hf_ip/vllm_gpu1.log ADDED
@@ -0,0 +1,387 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
2
+ warnings.warn(
3
+ INFO 01-04 13:13:27 [__init__.py:239] Automatically detected platform cuda.
4
+ INFO 01-04 13:13:29 [api_server.py:1043] vLLM API server version 0.8.5
5
+ INFO 01-04 13:13:29 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8002, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
6
+ INFO 01-04 13:13:36 [config.py:717] This model supports multiple tasks: {'score', 'classify', 'reward', 'embed', 'generate'}. Defaulting to 'generate'.
7
+ INFO 01-04 13:13:37 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
8
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
9
+ warnings.warn(
10
+ INFO 01-04 13:13:41 [__init__.py:239] Automatically detected platform cuda.
11
+ INFO 01-04 13:13:44 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
12
+ WARNING 01-04 13:13:44 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7fdcbc82a650>
13
+ INFO 01-04 13:13:46 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
14
+ INFO 01-04 13:13:46 [cuda.py:221] Using Flash Attention backend on V1 engine.
15
+ WARNING 01-04 13:13:46 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
16
+ INFO 01-04 13:13:46 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+ INFO 01-04 13:15:40 [loader.py:458] Loading weights took 114.09 seconds
35
+ INFO 01-04 13:15:41 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 114.594255 seconds
36
+ INFO 01-04 13:15:55 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
37
+ INFO 01-04 13:15:55 [backends.py:430] Dynamo bytecode transform time: 14.52 s
38
+ INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later use
39
+ INFO 01-04 13:16:50 [backends.py:148] Compiling a graph for general shape takes 53.09 s
40
+ INFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 67.61 s in total
41
+ INFO 01-04 13:17:54 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
42
+ INFO 01-04 13:17:54 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94x
43
+ INFO 01-04 13:18:34 [gpu_model_runner.py:1686] Graph capturing finished in 40 secs, took 1.21 GiB
44
+ INFO 01-04 13:18:34 [core.py:159] init engine (profile, create kv cache, warmup model) took 172.87 seconds
45
+ INFO 01-04 13:18:34 [core_client.py:439] Core engine process 0 ready.
46
+ WARNING 01-04 13:18:34 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
47
+ INFO 01-04 13:18:34 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
48
+ INFO 01-04 13:18:34 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
49
+ INFO 01-04 13:18:34 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8002
50
+ INFO 01-04 13:18:34 [launcher.py:28] Available routes are:
51
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /openapi.json, Methods: HEAD, GET
52
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /docs, Methods: HEAD, GET
53
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: HEAD, GET
54
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /redoc, Methods: HEAD, GET
55
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /health, Methods: GET
56
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /load, Methods: GET
57
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /ping, Methods: POST, GET
58
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /tokenize, Methods: POST
59
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /detokenize, Methods: POST
60
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /v1/models, Methods: GET
61
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /version, Methods: GET
62
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
63
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /v1/completions, Methods: POST
64
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /v1/embeddings, Methods: POST
65
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /pooling, Methods: POST
66
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /score, Methods: POST
67
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /v1/score, Methods: POST
68
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
69
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /rerank, Methods: POST
70
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /v1/rerank, Methods: POST
71
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /v2/rerank, Methods: POST
72
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /invocations, Methods: POST
73
+ INFO 01-04 13:18:34 [launcher.py:36] Route: /metrics, Methods: GET
74
+ INFO: Started server process [1315813]
75
+ INFO: Waiting for application startup.
76
+ INFO: Application startup compINFO 0INFO 01-04 13:59:57 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:00:00 [loggers.py:111] Engine 000: Avg prompt throughput: 44.1 tokens/s, Avg generation throughput: 37.1 tokens/s, Running: 1 reqs, Waiting: INFO 0INFO 01-04 14:00:07 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:00:10 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: INFO 01-04 14:00:15 [loggers.py:111] Engine 000: Avg prompt thINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
77
+ INFO 01-04 14:00:20 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg INFO 01-04 14:00:25 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:00:27 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:00:30 [loggers.py:111] Engine 000: Avg prompt throughput: 48.6 tokens/s, Avg gINFO: 10.46.17.192:0 - "GET /v1/models HTTP/1.1" 200 OK
78
+ INFO: 1INFO 01-04 14:00:37 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:00:40 [loggers.py:111] Engine 000: Avg prompt throughput: 64.6 tokens/s, Avg generation throughput: 71.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, Prefix cache hit rate: 1.0%
79
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
80
+ INFO 01-04 14:00:50 [loggers.py:111] Engine 000: Avg prompt throughput: 60.4 tokens/s, Avg generation throughput: 97.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cache hit rate: 0.7%
81
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
82
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
83
+ INFO 01-04 14:01:00 [loggers.py:111] Engine 000: Avg prompt throughput: 82.0 tokens/s, Avg generation throughput: 97.5 tokens/s, Running: 1 reqs, WaitINFO: INFO 01-04 14:01:07 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:01:10 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.6%, Prefix cache hit rate: 1.1%IINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
84
+ INFO 01-04 14:01:20 [loggers.py:111] Engine 000: Avg prompt throughput: 85.1 tokens/s, Avg generation throughput: 75.3 tokens/s, Running: 1 reqs, WaitiINFINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 INFO 01-INFO 01-04 14:01:27 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:01:30 [loggers.py:111] Engine 000: Avg prompt throughput: 96.6 tokens/s, INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
85
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
86
+ INFO 01-04 14:01:40 [loggers.py:111] Engine 000: Avg prompt throughput: 66.1 tokens/s, Avg generation throughput: 84.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 0.6%
87
+ ININFO: INFO 01-04 14:01:47 [loggers.py:111] Engine 000: Avg pINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
88
+ INFO 01-04 14:01:50 [loggers.py:111] Engine 000: Avg prompt throughput: 96.2 tokens/s, Avg generINFO 01-04 14:01:57 [loggers.py:111] Engine 000: Avg prompt througINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
89
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.INFO 01-04 14:02:00 [loggers.pyINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" INFO 01INFO 01-04 14:02:07 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:02:07 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:02:10 [loggers.py:111] Engine 000: Avg prompt throughput: 96.3 tokens/s, Avg genINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
90
+ INFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompt thrINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
91
+ INFO 01-04 14:02:20 [loINFINFO 01-04 14:02:27 [loggers.py:111] Engine 000: Avg prompt throughput: 96.0 tokens/s, Avg generation throughput: 86.9 tokens/sINFO 01-04 14:02:27 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:02:30 [loggeINFO 01-04 14:02:37 [loggers.py:111] Engine 000: Avg prompt throughput: 113.8 tokens/s, Avg generation throughput: 138.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, PINFO 01-04 14:02:40 [loggerINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
92
+ INFO 01-04 14:02:47 [loggers.py:111] Engine 000: Avg prompt throughput: 133.4 tokens/s, Avg generation throughput: 181.1 tokeINFO 01-04 14:02:47 [loggers.py:111] Engine 000: Avg prompt throughput: 62.3 tokens/s, Avg genINFO: 10.46.50.192:0 - "POST /v1/coINFO 01-04 14:02:50 INFO: INFO 01-04 14:02:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 107.0 tokeINFO 01-04 14:02:57 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:03:00 [loggers.py:111] Engine 000: Avg prompt throughput: 138.3 tokens/s, INFO 0INFO 01-04 14:03:07 [loggers.py:111] Engine 000: Avg promptINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OINFO 01-04 14:03:10 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting:INFO 01-04 14:03:15 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:03:17 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:03:20 [loggers.py:111] Engine 000: Avg prompt throughput: 137.2 tokens/s, Avg generation throughput: 59.5 tokens/s, Running: 1 reqs, Waiting: INFO 01-04 14:03:25 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:03:30 [loggers.py:111] Engine 000: Avg prompt throughput: 154.1 tokens/s, Avg generation throughput: 88.1 tokens/s, Running: 2 reqs, Waiting: INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200INFO: INFO: 10.45.190.192:0 - "POST /v1/completions HTTPINFO 01-04 14:03:40 [loggers.py:111] Engine 000: Avg prompt throughput: 260.7 tokens/s, Avg generation throughput: 95.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 5.4%
93
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
94
+ INFO 01-04 14:03:50 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 129.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 5.4%
95
+ ININFO: INFO 01-04 14:03:57 [loggers.py:111] Engine 000: AvgINFO 01-04 14:04:00 [loggers.py:111] Engine 000: Avg prompt throughput: 155.5 tokens/s, Avg generation throughput: 70.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/INFO: 10INFO 01-04 14:04:07 [loggers.py:111] Engine 000: AvgINFO 01-04 14:04:10 [loggers.py:111] Engine 000: Avg prompt throughput: 111.4 tokens/s, Avg generation throughput: 88.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/INFO: INFINFO 01-04 14:04:17 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:04:17 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:04:20 [loggers.py:111] Engine 000: Avg prompt throughput: 159.6 tokens/s, Avg generatiINFO 01-04 14:04:27 [loggers.py:111] Engine 000: Avg prompt throughput: 154.5 tokens/s, Avg generation throughput: 78.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-04 14:04:30 [loggers.py:11INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
96
+ INFO 01-04 14:04:37 [loggers.py:111] Engine 000: Avg prompt throughput: 169.7 tokens/s, Avg generation throughput: 116.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-04 14:04:40 [loggers.py:11INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/INFO 01-04 1INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
97
+ INFO 01-04 14:04:47 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:04:47 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:04:50 [loggers.py:111] Engine 000: Avg prompt throughput: 107.0 tokens/s,INFO 01-04INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OINFO 01-04 14:04:57 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:05:00 [loggers.py:111] Engine 000: Avg prompt throughput: 314.6 tokens/s,INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
98
+ INFO 01-04 INFO 01-04 14:05:07 [loggers.py:111] Engine 000: Avg prompt throughput: 336.8 tokens/s, Avg generation throughput: 42.1 toINFO 01-04 14:05:10 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generaINFO 01-04 14:05:17 [loggers.py:111] Engine 000: Avg prompt throughput: 164.8 tokens/s, Avg generation throughput: 98.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, PrefixINFO 01-04 14:05:17 [lINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
99
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1INFO 01-04 INFO 01-04 14:05:27 [loggers.py:111] Engine 000: Avg prompt throughput: 227.0 tokens/s, Avg generation throughput: 98.1 tINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
100
+ INFO 01-04 14:05:30 [INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
101
+ INFO: 10INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1INFO 01-0INFO 01-04 14:05:37 [loggers.py:111] Engine 000: AvINFO 01-04 14:05:40 [loggers.py:111] Engine 000: Avg prompt throughput: 322.2 tokens/s, Avg generation throughput: 48.5 tokens/s, Running: 2 reqs, Waiting: INFO 01-0INFO 01-04 14:05:47 [loggers.py:111] Engine 000: Avg INFO 01-04 14:05:50 [loggers.py:111] Engine 000: Avg prompt throughput: 193.6 tokens/s, Avg generation throughput: 101.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, INFO 01-04 14:05:57 [loggers.py:111] Engine 000: Avg prompt throughput: 176.7 tokens/s, Avg generation throughput: 80.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1INFO: 10.43.30.3:0 - "POST /vINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
102
+ INFO 01-04 14:06:07 [loggers.py:111] Engine 000: Avg prompt throughput: 274.3 tokens/s, Avg generation throughput: 132.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usagINFO 01-04 14:06:10 [loggers.py:111] INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
103
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
104
+ INFO 01-04 14:06:17 [loggers.py:111] Engine 000: Avg INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
105
+ INFO 01-04 14:06:20 [loggers.py:111] Engine 000: Avg prompt throughput: 317.3 tokens/s, Avg generatINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
106
+ INFO 01-04 14:06:27 [loggers.py:111] Engine 000: Avg prompt throughput: 275.0 tokens/s, Avg generation throughput: 74.9 INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
107
+ INFO 01-04 14:06:30 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg gINFO INFO 01-04 14:06:37 [loggers.py:111] Engine 000: Avg prompt throughput: 305.9 tokens/s, Avg generation throughput: 49.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usaINFO INFO 01-04 14:06:40 [loggers.py:1INFO 01-04 14:06:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 95.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1INFO: 10.43.30.5:0 - "POST /vINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 20INFO INFO 01-04 14:06:57 [loggers.py:111] Engine 000: Avg prompt throughput: 461.4 tokens/s, Avg generation throughput: 113.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usagINFO 01-04 14:07:00 [loggers.py:111] Engine 000: Avg prompt throughput: 143.9 tokens/s, Avg generatINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
108
+ INFO 0INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
109
+ INFO 01-04 14:07:07 [loggers.py:111] Engine 000: AvINFO 01-04 14:07:10 [loggers.py:111] Engine 000: Avg prompt throughput: 247.3 tokens/s, Avg generation throughput: 63.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GININFO 01-04 14:07:17 [loggers.py:111] Engine 000: AvINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
110
+ INFO 01-04 14:07:20 [loggers.py:111] Engine 000: Avg prompt throughput: 267.1 tokens/s, Avg generatINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
111
+ INFO 01-04 14:07:27 [loggers.py:111] Engine 000: Avg proINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
112
+ INFO 01-04 14:07:30 [loggers.py:111] Engine 000: Avg prompt throughput: 195.5 tokens/s, Avg genINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
113
+ INFO 01-04 14:07:37 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:07:40 [loggers.py:111] Engine 000: Avg prompt throughput: 253.5 tokens/s, Avg generation throughput: 110.5 tokens/s, Running: 3 reqs, Waiting: 0 INFOINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
114
+ INFO 01-04 14:07:50 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 116.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 1.9%
115
+ INFO: INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
116
+ INFO 01-04 14:08:00 [loggers.py:111] Engine 000: Avg prompt throughput: 624.6 tokens/s, Avg generation throughput: 109.3 tokens/s, Running: 2 reqs, Waiting: 0 reqINFO 01-04 14:08:07 [loggers.py:111] Engine 000: Avg promINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
117
+ INFO 01-04 14:08:10 [loggers.py:111] Engine 000: Avg prompt throughput: 317.2 tokens/s, Avg genINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
118
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
119
+ INFO 01-04 14:08:17 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:08:20 [loggers.py:111] Engine 000: Avg prompt throughput: 283.7 tokens/s, Avg generation throughput: 97.8 tokens/s, Running: 2 reqs, Waiting: 0 reINFO 01-04 14:08:27 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:08:30 [loggers.py:111] Engine 000: Avg prompt throughput: 362.9 tokens/s, Avg generation throughput: 97.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs,INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1INFO: INFO 01-04 14:08:37 [loggers.py:111] Engine 000: Avg prompt throughput: 220.4 tokens/s, Avg generation throughput: 32.8 tokens/INFO 01-04 14:08:40 [loggers.py:111] Engine 000: Avg prompt throughput: 271.7 tokens/s, AvgINFINFO 01-04 14:08:44 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:08:47 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:08:50 [loggers.py:111] Engine 000: Avg prompt throughput: 302.4 tokens/s, Avg generation throughput: 97.2 tokens/s, Running: 3 reqs, WaitingINFO 01-04 14:08:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.6 tokens/s, RunINFO 01-04 14:09:00 [loggers.py:111] Engine 000: Avg prompt throughput: 295.0 tokens/INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
120
+ INFO 01-04 14:09:07 [loggers.py:111] Engine 000: Avg prompt throughput: 445.3 tokens/s, Avg generation throughput: 93.3 tokens/s, RINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
121
+ INFO 01-04 14:09:10 [INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
122
+ INFO 01-04 14:09:17 [loggers.py:111] Engine 000: Avg prompt throughput: 270.6 tokens/s, Avg generation throughput: 73.4 tokens/s, RINFO 01-04 14:09:20 [loggers.py:111] Engine 000: Avg prompt throughput: 412.2 tokens/s,INFO 01-04 14:09:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cINFO 01-04 14:09:30 [loggers.py:111] Engine 000: Avg prompt throughput: 278.8 tokens/sINFO 01-04 14:09:37 [loggers.py:111] Engine 000: Avg prompt throughput: 119.1 tokens/s, Avg generation throughput: 77.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix caINFO 01-04 14:09:40INFO 01-04 14:09:47 [loggers.py:111] Engine 000: Avg prompt throughput: 678.1 tokens/s, Avg generation throughput: 133.0 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.2%, Prefix cache hit rate: 10.2%
123
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
124
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
125
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
126
+ INFO 01-04 14:09:57 [loggers.py:111] Engine 000: Avg prompt throughput: 361.0 tokens/s, Avg generation throughput: 108.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 9.8%
127
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
128
+ INFO: INFO 01-04 14:10:04 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:10:07 [loggers.py:111] Engine 000: Avg prompt throughput: 365.5 tokens/s, Avg generation throughput: 52.3 tokens/s, RuINFO: 10.45.190.192:0 - INFO: 10.46.50.192:0 - "POST /v1/coINFO 01-04 14:10:10 INFO 01-04 14:10:17 [loggers.py:111] Engine 000: Avg prompt throughput: 292.4 tokens/s, Avg generation throughput: 60.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cINFO: 10.46.17.19INFO 01-04 14:10:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 12.2%
129
+ INFO: INFO 01-04 14:10:34 [loggers.py:111] Engine 000: Avg prompt throughput: 226.3 tokens/s, Avg generation throughput: 114.3 tokINFO 01-04 14:10:37 [loggers.py:111] Engine 000: Avg prompt throughput: 336.0 tokens/s, Avg genINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
130
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
131
+ INFO 01-04 14:10:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cINFO 01-04 14:10:50 [INFO 01-04 14:10:57 [loggers.py:111] Engine 000: Avg prompt throughput: 306.9 tokens/s, Avg generation throughput: 11.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix INFO: 10.46.50.192INFO 01-04 14:11:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 11.5%
132
+ INFO: 10.46.50.192:0 - "POST /v1/compleINFO 01-04 14:11:10 [loINFO 01-04 14:11:17 [loggers.py:111] Engine 000: Avg prompt throughput: 384.9 tokens/s, Avg generation throughput: 44.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-INFO 01-04 14:11:20 [loggerINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OKIINFO 01-04 14:11:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 19.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usageINFO 01-04 1INFO 01-04 14:11:30 [lINFO 01-04 14:11:44 [loggers.py:111] Engine 000: Avg prompt throughput: 327.4 tokens/s, Avg generation throughput: 8.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 8.8%
133
+ INFO 01-04 14:11:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, PrefINFO 01-04 14:12:00 [loggers.py:111] Engine 000: Avg prompt throughput: 442.4 tokens/s, AvgINFO 01-04 14:12:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 6.4 tokens/s, INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
134
+ INFO 01-04 14:12:10 [INFO 01-04 14:12:14 [loggers.py:111] Engine 000: Avg prompt throughput: 271.8 tokens/s, Avg generation throughput: 22.1 tokINFO 01-04 14:12:26 [loggers.py:111] Engine 000: Avg prompt throughput: 277.1 tokens/s, Avg generation throughput: 45.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 11.5%
135
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
136
+ INFO 01-04 14:12:36 [loggers.py:111] Engine 000: Avg prompt throughput: 273.1 tokens/s, Avg generation throughput: 2.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 11.3%
137
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
138
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
139
+ INFO 01-04 14:12:46 [loggers.py:111] Engine 000: Avg prompt throughput: 180.5 tokens/s, Avg generation throughput: 50.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cacheINFO: 10.45.190.192:0 - "POST /v1/complINFO 01-04 14:12:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.1%
140
+ INFO 01-04 14:13:16 [loggers.py:111] Engine 000: Avg prompt throughput: 340.2 tokens/s, Avg generation throughput: 33.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 10.8%
141
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
142
+ INFO 01-04 14:13:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 42.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.8%
143
+ INFO 01-04 14:13:36 [loggers.py:111] Engine 000: Avg prompt throughput: 713.2 tokens/s, Avg generation throughput: 45.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 10.2%
144
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
145
+ INFO 01-04 14:13:46 [loggers.py:111] Engine 000: Avg prompt throughput: 282.3 tokens/s, Avg generation throughput: 99.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 10.0%
146
+ INFO 01-04 14:13:56 [loggers.py:111] Engine 000: Avg prompt throughput: 382.0 tokens/s, Avg generation throughput: 106.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.7%, Prefix cache hit rate: 11.8%
147
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
148
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
149
+ INFO 01-04 14:14:06 [loggers.py:111] Engine 000: Avg prompt throughput: 532.0 tokens/s, Avg generation throughput: 65.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 11.4%
150
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
151
+ INFO 01-04 14:14:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 51.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 11.4%
152
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
153
+ INFO 01-04 14:14:26 [loggers.py:111] Engine 000: Avg prompt throughput: 743.0 tokens/s, Avg generation throughput: 59.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 10.8%
154
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
155
+ INFO 01-04 14:14:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 59.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 10.8%
156
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
157
+ INFO 01-04 14:14:46 [loggers.py:111] Engine 000: Avg prompt throughput: 614.5 tokens/s, Avg generation throughput: 49.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 10.4%
158
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
159
+ INFO 01-04 14:14:56 [loggers.py:111] Engine 000: Avg prompt throughput: 413.1 tokens/s, Avg generation throughput: 38.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 10.1%
160
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
161
+ INFO 01-04 14:15:06 [loggers.py:111] Engine 000: Avg prompt throughput: 872.1 tokens/s, Avg generation throughput: 74.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 9.9%
162
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
163
+ INFO 01-04 14:15:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 50.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 9.9%
164
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
165
+ INFO 01-04 14:15:26 [loggers.py:111] Engine 000: Avg prompt throughput: 535.4 tokens/s, Avg generation throughput: 55.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 9.6%
166
+ INFO 01-04 14:15:36 [loggers.py:111] Engine 000: Avg prompt throughput: 417.3 tokens/s, Avg generation throughput: 68.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 9.4%
167
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
168
+ INFO 01-04 14:15:46 [loggers.py:111] Engine 000: Avg prompt throughput: 550.4 tokens/s, Avg generation throughput: 79.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 9.1%
169
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
170
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
171
+ INFO 01-04 14:15:56 [loggers.py:111] Engine 000: Avg prompt throughput: 605.5 tokens/s, Avg generation throughput: 68.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 11.6%
172
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
173
+ INFO 01-04 14:16:06 [loggers.py:111] Engine 000: Avg prompt throughput: 303.1 tokens/s, Avg generation throughput: 26.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 11.4%
174
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
175
+ INFO 01-04 14:16:16 [loggers.py:111] Engine 000: Avg prompt throughput: 463.2 tokens/s, Avg generation throughput: 53.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 11.2%
176
+ INFO 01-04 14:16:26 [loggers.py:111] Engine 000: Avg prompt throughput: 362.6 tokens/s, Avg generation throughput: 59.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 11.0%
177
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
178
+ INFO 01-04 14:16:36 [loggers.py:111] Engine 000: Avg prompt throughput: 604.3 tokens/s, Avg generation throughput: 64.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 11.8%
179
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
180
+ INFO 01-04 14:16:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 84.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 11.8%
181
+ INFO 01-04 14:16:56 [loggers.py:111] Engine 000: Avg prompt throughput: 576.9 tokens/s, Avg generation throughput: 82.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.2%, Prefix cache hit rate: 11.4%
182
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
183
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
184
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
185
+ INFO 01-04 14:17:06 [loggers.py:111] Engine 000: Avg prompt throughput: 477.4 tokens/s, Avg generation throughput: 81.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.1%
186
+ INFO 01-04 14:17:16 [loggers.py:111] Engine 000: Avg prompt throughput: 929.6 tokens/s, Avg generation throughput: 57.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 14.3%
187
+ INFO 01-04 14:17:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 14.3%
188
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
189
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
190
+ INFO 01-04 14:17:36 [loggers.py:111] Engine 000: Avg prompt throughput: 633.9 tokens/s, Avg generation throughput: 68.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 16.5%
191
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
192
+ INFO 01-04 14:17:46 [loggers.py:111] Engine 000: Avg prompt throughput: 708.9 tokens/s, Avg generation throughput: 40.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 16.0%
193
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
194
+ INFO 01-04 14:17:56 [loggers.py:111] Engine 000: Avg prompt throughput: 520.8 tokens/s, Avg generation throughput: 29.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 16.7%
195
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
196
+ INFO 01-04 14:18:06 [loggers.py:111] Engine 000: Avg prompt throughput: 496.4 tokens/s, Avg generation throughput: 33.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 16.4%
197
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
198
+ INFO 01-04 14:18:16 [loggers.py:111] Engine 000: Avg prompt throughput: 407.0 tokens/s, Avg generation throughput: 68.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix cache hit rate: 16.6%
199
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
200
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
201
+ INFO 01-04 14:18:26 [loggers.py:111] Engine 000: Avg prompt throughput: 524.0 tokens/s, Avg generation throughput: 49.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.3%
202
+ INFO 01-04 14:18:36 [loggers.py:111] Engine 000: Avg prompt throughput: 412.6 tokens/s, Avg generation throughput: 16.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix cache hit rate: 16.5%
203
+ INFO 01-04 14:18:46 [loggers.py:111] Engine 000: Avg prompt throughput: 741.7 tokens/s, Avg generation throughput: 74.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 16.0%
204
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
205
+ INFO 01-04 14:18:56 [loggers.py:111] Engine 000: Avg prompt throughput: 602.2 tokens/s, Avg generation throughput: 104.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 17.2%
206
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
207
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
208
+ INFO 01-04 14:19:06 [loggers.py:111] Engine 000: Avg prompt throughput: 637.4 tokens/s, Avg generation throughput: 51.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 17.6%
209
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
210
+ INFO 01-04 14:19:16 [loggers.py:111] Engine 000: Avg prompt throughput: 353.2 tokens/s, Avg generation throughput: 38.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 17.4%
211
+ INFO 01-04 14:19:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 17.4%
212
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
213
+ INFO 01-04 14:19:36 [loggers.py:111] Engine 000: Avg prompt throughput: 511.9 tokens/s, Avg generation throughput: 57.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 17.1%
214
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
215
+ INFO 01-04 14:19:46 [loggers.py:111] Engine 000: Avg prompt throughput: 1315.0 tokens/s, Avg generation throughput: 56.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.2%, Prefix cache hit rate: 17.8%
216
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
217
+ INFO 01-04 14:19:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 81.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 17.8%
218
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
219
+ INFO 01-04 14:20:06 [loggers.py:111] Engine 000: Avg prompt throughput: 721.5 tokens/s, Avg generation throughput: 37.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 17.4%
220
+ INFO 01-04 14:20:16 [loggers.py:111] Engine 000: Avg prompt throughput: 864.7 tokens/s, Avg generation throughput: 75.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.7%, Prefix cache hit rate: 16.9%
221
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
222
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
223
+ INFO 01-04 14:20:26 [loggers.py:111] Engine 000: Avg prompt throughput: 616.5 tokens/s, Avg generation throughput: 80.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 16.6%
224
+ INFO 01-04 14:20:36 [loggers.py:111] Engine 000: Avg prompt throughput: 670.8 tokens/s, Avg generation throughput: 65.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache hit rate: 16.3%
225
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
226
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
227
+ INFO 01-04 14:20:46 [loggers.py:111] Engine 000: Avg prompt throughput: 454.9 tokens/s, Avg generation throughput: 95.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 16.9%
228
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
229
+ INFO 01-04 14:20:56 [loggers.py:111] Engine 000: Avg prompt throughput: 554.7 tokens/s, Avg generation throughput: 67.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 16.6%
230
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
231
+ INFO 01-04 14:21:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.6%
232
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
233
+ INFO 01-04 14:21:16 [loggers.py:111] Engine 000: Avg prompt throughput: 704.4 tokens/s, Avg generation throughput: 29.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.2%
234
+ INFO 01-04 14:21:26 [loggers.py:111] Engine 000: Avg prompt throughput: 319.7 tokens/s, Avg generation throughput: 30.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 18.5%
235
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
236
+ INFO 01-04 14:21:36 [loggers.py:111] Engine 000: Avg prompt throughput: 397.9 tokens/s, Avg generation throughput: 57.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 19.3%
237
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
238
+ INFO 01-04 14:21:46 [loggers.py:111] Engine 000: Avg prompt throughput: 692.2 tokens/s, Avg generation throughput: 44.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 18.9%
239
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
240
+ INFO 01-04 14:21:56 [loggers.py:111] Engine 000: Avg prompt throughput: 436.7 tokens/s, Avg generation throughput: 53.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 18.8%
241
+ INFO 01-04 14:22:06 [loggers.py:111] Engine 000: Avg prompt throughput: 502.7 tokens/s, Avg generation throughput: 65.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 18.6%
242
+ INFO 01-04 14:22:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 91.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 18.6%
243
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
244
+ INFO 01-04 14:22:26 [loggers.py:111] Engine 000: Avg prompt throughput: 768.5 tokens/s, Avg generation throughput: 102.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache hit rate: 18.2%
245
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
246
+ INFO 01-04 14:22:36 [loggers.py:111] Engine 000: Avg prompt throughput: 481.2 tokens/s, Avg generation throughput: 56.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cache hit rate: 18.0%
247
+ INFO 01-04 14:22:46 [loggers.py:111] Engine 000: Avg prompt throughput: 699.0 tokens/s, Avg generation throughput: 91.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.4%, Prefix cache hit rate: 18.9%
248
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
249
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
250
+ INFO 01-04 14:22:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 91.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 18.9%
251
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
252
+ INFO 01-04 14:23:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 27.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.9%
253
+ INFO 01-04 14:23:16 [loggers.py:111] Engine 000: Avg prompt throughput: 585.8 tokens/s, Avg generation throughput: 29.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 18.6%
254
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
255
+ INFO 01-04 14:23:26 [loggers.py:111] Engine 000: Avg prompt throughput: 399.2 tokens/s, Avg generation throughput: 29.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 18.4%
256
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
257
+ INFO 01-04 14:23:36 [loggers.py:111] Engine 000: Avg prompt throughput: 689.4 tokens/s, Avg generation throughput: 44.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 18.1%
258
+ INFO 01-04 14:23:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 18.1%
259
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
260
+ INFO 01-04 14:23:56 [loggers.py:111] Engine 000: Avg prompt throughput: 946.6 tokens/s, Avg generation throughput: 70.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache hit rate: 17.7%
261
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
262
+ INFO 01-04 14:24:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 69.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 17.7%
263
+ INFO 01-04 14:24:16 [loggers.py:111] Engine 000: Avg prompt throughput: 344.0 tokens/s, Avg generation throughput: 50.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 17.6%
264
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
265
+ INFO 01-04 14:24:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 62.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 17.6%
266
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
267
+ INFO 01-04 14:24:36 [loggers.py:111] Engine 000: Avg prompt throughput: 782.9 tokens/s, Avg generation throughput: 40.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 18.5%
268
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
269
+ INFO 01-04 14:24:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.5%
270
+ INFO 01-04 14:24:56 [loggers.py:111] Engine 000: Avg prompt throughput: 661.2 tokens/s, Avg generation throughput: 35.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 18.2%
271
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
272
+ INFO 01-04 14:25:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.2%
273
+ INFO 01-04 14:25:16 [loggers.py:111] Engine 000: Avg prompt throughput: 1472.3 tokens/s, Avg generation throughput: 43.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.0%, Prefix cache hit rate: 17.6%
274
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
275
+ INFO 01-04 14:25:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 52.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 17.6%
276
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
277
+ INFO 01-04 14:25:36 [loggers.py:111] Engine 000: Avg prompt throughput: 932.1 tokens/s, Avg generation throughput: 54.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 17.3%
278
+ INFO 01-04 14:25:46 [loggers.py:111] Engine 000: Avg prompt throughput: 567.9 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.1%, Prefix cache hit rate: 17.0%
279
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
280
+ INFO 01-04 14:25:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 17.0%
281
+ INFO 01-04 14:26:06 [loggers.py:111] Engine 000: Avg prompt throughput: 1074.1 tokens/s, Avg generation throughput: 52.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.9%, Prefix cache hit rate: 18.2%
282
+ INFO 01-04 14:26:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.2%, Prefix cache hit rate: 18.2%
283
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
284
+ INFO 01-04 14:26:26 [loggers.py:111] Engine 000: Avg prompt throughput: 827.6 tokens/s, Avg generation throughput: 62.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.2%, Prefix cache hit rate: 17.9%
285
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
286
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
287
+ INFO 01-04 14:26:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 32.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.9%
288
+ INFO 01-04 14:26:46 [loggers.py:111] Engine 000: Avg prompt throughput: 864.0 tokens/s, Avg generation throughput: 23.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 17.6%
289
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
290
+ INFO 01-04 14:26:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 42.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.6%
291
+ INFO 01-04 14:27:06 [loggers.py:111] Engine 000: Avg prompt throughput: 763.8 tokens/s, Avg generation throughput: 30.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 17.3%
292
+ INFO 01-04 14:27:16 [loggers.py:111] Engine 000: Avg prompt throughput: 522.4 tokens/s, Avg generation throughput: 49.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.4%, Prefix cache hit rate: 17.9%
293
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
294
+ INFO 01-04 14:27:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 64.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 17.9%
295
+ INFO 01-04 14:27:36 [loggers.py:111] Engine 000: Avg prompt throughput: 788.4 tokens/s, Avg generation throughput: 76.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.7%, Prefix cache hit rate: 17.6%
296
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
297
+ INFO 01-04 14:27:46 [loggers.py:111] Engine 000: Avg prompt throughput: 528.2 tokens/s, Avg generation throughput: 85.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache hit rate: 17.5%
298
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
299
+ INFO 01-04 14:27:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 17.5%
300
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
301
+ INFO 01-04 14:28:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.5%
302
+ INFO 01-04 14:28:16 [loggers.py:111] Engine 000: Avg prompt throughput: 840.4 tokens/s, Avg generation throughput: 40.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 18.6%
303
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
304
+ INFO 01-04 14:28:26 [loggers.py:111] Engine 000: Avg prompt throughput: 1025.3 tokens/s, Avg generation throughput: 30.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: 19.9%
305
+ INFO 01-04 14:28:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 19.9%
306
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
307
+ INFO 01-04 14:28:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 11.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.9%
308
+ INFO 01-04 14:28:56 [loggers.py:111] Engine 000: Avg prompt throughput: 907.3 tokens/s, Avg generation throughput: 8.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 19.6%
309
+ INFO 01-04 14:29:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 19.6%
310
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
311
+ INFO 01-04 14:29:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 31.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.6%
312
+ INFO 01-04 14:29:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.6%
313
+ INFO 01-04 14:29:36 [loggers.py:111] Engine 000: Avg prompt throughput: 721.0 tokens/s, Avg generation throughput: 18.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 19.4%
314
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
315
+ INFO 01-04 14:29:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.4%
316
+ INFO 01-04 14:29:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.4%
317
+ INFO 01-04 14:30:16 [loggers.py:111] Engine 000: Avg prompt throughput: 608.1 tokens/s, Avg generation throughput: 37.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 19.1%
318
+ INFO 01-04 14:30:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 19.1%
319
+ INFO 01-04 14:30:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 19.1%
320
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
321
+ INFO 01-04 14:30:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.1%
322
+ INFO 01-04 14:30:56 [loggers.py:111] Engine 000: Avg prompt throughput: 773.2 tokens/s, Avg generation throughput: 42.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 18.9%
323
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
324
+ INFO 01-04 14:31:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 28.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.9%
325
+ INFO 01-04 14:31:16 [loggers.py:111] Engine 000: Avg prompt throughput: 1030.2 tokens/s, Avg generation throughput: 14.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 18.6%
326
+ INFO 01-04 14:31:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 18.6%
327
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
328
+ INFO 01-04 14:31:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 20.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.6%
329
+ INFO 01-04 14:31:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.6%
330
+ INFO 01-04 14:31:56 [loggers.py:111] Engine 000: Avg prompt throughput: 591.5 tokens/s, Avg generation throughput: 30.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 18.4%
331
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
332
+ INFO 01-04 14:32:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 39.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.4%
333
+ INFO 01-04 14:32:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.4%
334
+ INFO 01-04 14:32:46 [loggers.py:111] Engine 000: Avg prompt throughput: 1263.6 tokens/s, Avg generation throughput: 29.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hit rate: 19.3%
335
+ INFO 01-04 14:32:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cache hit rate: 19.3%
336
+ INFO 01-04 14:33:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.4%, Prefix cache hit rate: 19.3%
337
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
338
+ INFO 01-04 14:33:16 [loggers.py:111] Engine 000: Avg prompt throughput: 838.5 tokens/s, Avg generation throughput: 56.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 19.0%
339
+ INFO 01-04 14:33:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 19.0%
340
+ INFO: 10.43INFO 01-04 14:33:48 [loggers.py:111] Engine 000: Avg prompt throughput: 558.6 tokens/s, Avg generation throughput: 22.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 15.1%
341
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
342
+ INFO 01-04 14:33:58 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 28.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cacINFO 01-04 14:34:16 [loggers.py:111] Engine 000: Avg prompt throughput: 757.7 tokens/s, Avg generation throughput: 15.4 tokens/s,INFO 01-04 14:34:27 [loggers.py:111] Engine 000: Avg prompt throughput: 697.8 tokens/s, Avg generation throughput: 19.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 16.5%
343
+ INFO 01-04 14:34:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 16.5%
344
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
345
+ INFO 01-04 14:34:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.5%
346
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
347
+ INFO 01-04 14:34:57 [lINFO 01-04 14:35:26 [loggers.py:111] Engine 000: Avg prompt throughput: 870.9 tokens/s, Avg generation throughput: 6.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 18.5%
348
+ INFO 01-04 14:35:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix INFO 01-04 14:37:07 [loggers.py:111] Engine 000: Avg prompt throughput: 453.7 tokens/s, Avg generation throughput: 7.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 16.2%
349
+ INFO 01-04 14:37:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 16.2%
350
+ INFO 01-04 14:37:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 16.2%
351
+ INFO 01-04 14:37:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 16.2%
352
+ INFO 01-04 14:37:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 16.2%
353
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
354
+ INFO 01-04 14:37:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 2.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.2%
355
+ INFO 01-04 14:38:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.2%
356
+ INFO 01-04 14:38:47 [loggers.py:111] Engine 000: Avg prompt throughput: 632.5 tokens/s, Avg generation throughput: 24.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 16.0%
357
+ INFO 01-04 14:38:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 16.0%
358
+ INFO 01-04 14:39:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 16.0%
359
+ INFO 01-04 14:39:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 16.0%
360
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
361
+ INFO 01-04 14:39:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 39.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.0INFO 01-04 14:39:44 [loggers.py:111] Engine 000: Avg prompt throughput: 657.7 tokens/s, Avg generation throughput: 28.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 10.8%INFO 01-04 14:40:17 [loggers.py:111] Engine 000: Avg prompt throughput: 901.7 tokens/s, Avg generation throughput: 25.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 15.8%
362
+ INFO 01-04 14:40:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, WaitingINFO 01-04 14:41:26 [loggers.py:111] Engine 000: Avg prompt throughput: 1032.5 tokens/s, Avg generation throughput: 38.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: 18.4%
363
+ INFO 01-04 14:41:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 18.4%
364
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
365
+ INFO 01-04 14:41:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 13.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.4%
366
+ INFO 01-04 14:41:56 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:41:57 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:42:44 [loggers.py:111] Engine 000: Avg prompt throughput: 657.9 tokens/s, AINFO 01-04 14:44:56 [loggers.py:111] Engine 000: Avg prompt throughput: 719.8 tokens/s, Avg generation throughput: 41.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 18.2%
367
+ INFO 01-04 14:45:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 18.2%
368
+ INFO 01-04 14:45:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, PrefiINFO 01-04 14:46:44 [loggers.py:111] Engine 000: Avg prompt throughput: 1221.5 tokens/s, Avg generation throughput: 36.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 11.1%
369
+ INFO 01-04 14:46:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hit rate: 11.1%
370
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
371
+ INFO 01-04 14:47:04 INFO 01-04 14:48:56 [loggers.py:111] Engine 000: Avg prompt throughput: 377.9 tokens/s, Avg generation throughput: 35.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 18.1%
372
+ INFO 01-04 14:49:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, PrefixINFO 01-04 14:51:44 [loggers.py:111] Engine 000: Avg prompt throughput: 892.0 tokens/s, Avg generation throughput: 14.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 11.0%
373
+ INFO 01-04 14:51:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 11.0%
374
+ INFO 01-04 14:52:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 11.0%
375
+ INFO 01-04 14:52:14 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s,INFO 01-04 15:01:06 [loggers.py:111] Engine 000: Avg prompt throughput: 476.6 tokens/s, Avg generation throughput: 9.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 18.5%
376
+ INFO 01-04 15:01:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, RunINFO 01-04 15:02:05 [loggers.py:111] Engine 000: Avg prompt throughput: 481.1 tokens/s, Avg generation throughput: 22.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 17.INFO 01-04 15:09:17 [loggers.py:111] Engine 000: Avg prompt throughput: 542.4 tokens/s, Avg generation throughput: 14.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 15.6%
377
+ INFO 01-04 15:09:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 15.6%
378
+ INFO 01-04 15:09:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 15.6%
379
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
380
+ INFO 01-04 15:09:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 29.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.6%
381
+ INFO 01-04 15:09:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.6%
382
+ 5:04:28 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 18.2%
383
+ INFO 01-04 15:04:38 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 18.2%
384
+ INFO 01-04 15:04:48 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 18.2%
385
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
386
+ INFO 01-04 15:04:58 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 27.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.2%
387
+ INFO 01-04 15:05:08 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.2%
hf_ip/vllm_gpu2.log ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
2
+ warnings.warn(
3
+ INFO 01-04 13:13:30 [__init__.py:239] Automatically detected platform cuda.
4
+ INFO 01-04 13:13:32 [api_server.py:1043] vLLM API server version 0.8.5
5
+ INFO 01-04 13:13:32 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8003, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
6
+ INFO 01-04 13:13:39 [config.py:717] This model supports multiple tasks: {'reward', 'score', 'classify', 'generate', 'embed'}. Defaulting to 'generate'.
7
+ INFO 01-04 13:13:40 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
8
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
9
+ warnings.warn(
10
+ INFO 01-04 13:13:44 [__init__.py:239] Automatically detected platform cuda.
11
+ INFO 01-04 13:13:47 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
12
+ WARNING 01-04 13:13:47 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7fbf15272710>
13
+ INFO 01-04 13:13:49 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
14
+ INFO 01-04 13:13:49 [cuda.py:221] Using Flash Attention backend on V1 engine.
15
+ WARNING 01-04 13:13:49 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
16
+ INFO 01-04 13:13:49 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+ INFO 01-04 13:15:40 [loader.py:458] Loading weights took 111.25 seconds
35
+ INFO 01-04 13:15:41 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 111.510422 seconds
36
+ INFO 01-04 13:15:55 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
37
+ INFO 01-04 13:15:55 [backends.py:430] Dynamo bytecode transform time: 14.56 s
38
+ INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later use
39
+ INFO 01-04 13:16:50 [backends.py:148] Compiling a graph for general shape takes 53.12 s
40
+ INFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 67.68 s in total
41
+ INFO 01-04 13:17:54 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
42
+ INFO 01-04 13:17:54 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94x
43
+ INFO 01-04 13:18:33 [gpu_model_runner.py:1686] Graph capturing finished in 39 secs, took 1.21 GiB
44
+ INFO 01-04 13:18:33 [core.py:159] init engine (profile, create kv cache, warmup model) took 172.10 seconds
45
+ INFO 01-04 13:18:33 [core_client.py:439] Core engine process 0 ready.
46
+ WARNING 01-04 13:18:33 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
47
+ INFO 01-04 13:18:33 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
48
+ INFO 01-04 13:18:33 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
49
+ INFO 01-04 13:18:33 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8003
50
+ INFO 01-04 13:18:33 [launcher.py:28] Available routes are:
51
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /openapi.json, Methods: GET, HEAD
52
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /docs, Methods: GET, HEAD
53
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: GET, HEAD
54
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /redoc, Methods: GET, HEAD
55
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /health, Methods: GET
56
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /load, Methods: GET
57
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /ping, Methods: POST, GET
58
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /tokenize, Methods: POST
59
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /detokenize, Methods: POST
60
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/models, Methods: GET
61
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /version, Methods: GET
62
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
63
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/completions, Methods: POST
64
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/embeddings, Methods: POST
65
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /pooling, Methods: POST
66
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /score, Methods: POST
67
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/score, Methods: POST
68
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
69
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /rerank, Methods: POST
70
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/rerank, Methods: POST
71
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v2/rerank, Methods: POST
72
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /invocations, Methods: POST
73
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /metrics, Methods: GET
74
+ INFO: Started server process [1316160]
75
+ INFO: Waiting for application startup.
76
+ INFO: Application startup compINFO 0INFO 01-04 13:59:58 [loggers.py:111] Engine 000: Avg prompt throughput: 116.3 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.7%, Prefix cache hit rateIINFO 01-04 14:00:07 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:00:08 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 53.6 tokens/s, Running: 1 reqs, IINFOININFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 2IINFINFO 01-04 14:00:18 [loggers.py:111] Engine 000: Avg prompt throughput: 61.2 tokens/s, Avg generation throughput: 80.6 tokens/s, Running: 2 reqs,INFO INFO 01-04 14:00:23 [loggers.py:111] Engine 000: Avg prompt thINFOINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1"INFO 01INFO INFO 01-04 14:00:28 [loggers.py:111] Engine 000: Avg prompt throughput: 86.4 tokens/INFO: 10.43.30.5:0 - "GET /v1/models HTTP/1.1" 200 OK
77
+ INFO 01-04 14:00:33 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:00:37 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:00:38 [loggers.py:111] Engine 000: Avg prompt throughput: 82.1 tokens/s, Avg generation throughput: 81.6 tokens/s, Running: 2 reqINFINFO 01-04 14:00:46 [loggers.py:111] Engine 000: Avg prompt througINFO INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
78
+ INFO 01-04 14:00:48 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokeINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/INFO 01-04 IINFO 01-04 14:00:56 [loggers.py:111] Engine 000: Avg prompt throughput: 76.3 tokens/s, Avg generation throughput: 86.0 tokens/s, RunnINFO 01-04 14:00:58 [loggers.py:111] Engine 000: Avg prompt throughput: INFO: INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
79
+ INFO 01-04 14:01:07 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:01:08 [loggers.py:111] Engine 000: Avg prompt throughput: 61.2 tokens/s, Avg generation throughput: 51.3 tokens/s, Running: INFO: INFOINFO 01-04 14:01:16 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:01:17 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:01:18 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 60.6 tokens/s, Running:IINFO: INFO: INFO 01-04 14:01:26 [loggers.py:111] Engine 000: AINFO 01-0INFO 0INFO 01-04 14:01:28 [loggers.py:111] Engine 000: Avg prompt throughput: 62.5 tokens/s, Avg generation throughput: 83.0 tokens/s, Running: 2 reqs, WaitingINFO: 10.46.50.192:0 - "POST /v1/completions INFO 01-04 14:01:36 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 INFO 01-04 14:01:38 [loggers.py:111] Engine 000: Avg prompt throughput: 96.0 tokens/s, Avg generation throughput: 81.8 tokens/s, RunniINFO: 10.45.19INFO 01-04 14:01:46 [loggers.py:111] Engine 000: INFO 01-04 1INFO INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
80
+ INFO 01-04 14:01:48 [loggers.py:111] Engine 000: Avg prompt throughput: 78.3 tokens/s,INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" INFO 01-04 14:01:56 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:01:57 [loggers.py:111] Engine 000: Avg prompt throughput: 86.6 toINFO 01-04 14:01:58 [loggers.py:111] Engine 000: Avg prompt throughput: 95.2 tokens/s, Avg generation throughput: 96.9 tokens/s, RINFO 01-INFO 01-04 14:02:07 [loggers.py:111] Engine 000: Avg prompt throughput: 73.5 tokINFO 01-04 14:02:08 [loggers.py:111] Engine 000: Avg prompt throughput: 76.5 tokens/s, Avg generation throughput: 111.0 tokens/sINFO 01-04 14:INFO 01-04 14:02:16 [loggers.py:111] Engine 000: Avg prompt throughput: 86.9 tokens/s, Avg generation throughput: 91.5 tokens/sINFO 01-04 14:02INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 20INFO: 10.INFO: 10.46.50.192:0 - "POST /v1/completions HTTPINFO 01-04 14INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1"INFOINFO 01-04 14:02:27 [loggers.py:111] Engine 000: Avg prompt throughput: 85.7 tokens/s, Avg generation throughput: 82.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit ratINFO: 10.INFO 01-04 14:02:36 [loggers.py:111] Engine 000: Avg prompt throughput: 66.0 tokens/s, Avg generation throughput: 89.0 tokens/INFO 01-04 14:02:37 [loggers.py:111] Engine 000: Avg prompt throughput: 102.7 tokens/s,INFOINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
81
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1INFO 0INFO 01-04 14:02:47 [loggers.py:111] Engine 000: Avg prompt throughput: 145.2 tokens/s, Avg generation throughput: 62.3 tokens/s, Running: 2 reqs, WaitinINFO 01-04 14:02:48 [loggers.py:111] Engine 000: Avg prompININFO: INFO 01-04 14:02:56 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:02:57 [loggers.py:111] Engine 000: Avg prompt throughput: 104.7 tokens/s, Avg generation throughput: 98.4 tokens/s, Running: 2 reqs, WaINFOINFO 01-04 14:03:06 [loggers.py:111] Engine 000: Avg prompt throughput: 165.1 tokens/s, Avg generation throughput: 75.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, PrefiINFO 01-04 14:03:07 [loINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
82
+ INFO 01-04 14:03:16 [loggers.py:111] Engine 000: Avg prompt throughput: 121.6 tokens/s, Avg generation throughput: 106.5 tokens/sINFO 01-04 14:03:17 [loggers.py:111] Engine 000: Avg prompt throughput: 117.8 tokens/sINFO 01-04 14:03:18 [loggers.py:111] Engine 000: Avg prompINFO: 10INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
83
+ INFO 01-04 14:03:26 [loggers.py:111] Engine 000: Avg prompt INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
84
+ INFO 01-04 14:03:27 [INFO 01-04 14:03:28 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:03:36 [loggers.py:111] Engine 000: Avg prompt throughput: 123.7 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.7%, PreINFO 01-04 14:03:37 [loININFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.INFO 01-04INFO 01-04 14:03:46 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:03:46 [loggers.py:111] Engine 000: Avg prompt throughput: 149.4 tokens/s, Avg generation throughput: 75.5 tokens/s, Running: 2 reqs, WaINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
85
+ INFO 01-04 1INFO 01-04 14:03:56 [loggers.py:111] Engine 000: Avg prompt throughput: 283.3 tokens/s, Avg generation throughput: 44.5 tokens/s, Running: 2 reqs, WaiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
86
+ INFO 01-0INFO 01-04 14:04:03 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:04:06 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:04:06 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:04:07 [loggers.py:111] Engine 000: Avg prompt throughput: 114.7 tINFO 01-INFO 01-04 14:04:16 [loggers.py:111] Engine 000: Avg prompt throughput: 166.0 tokens/s, Avg generation throughput: 77.1 tokens/s, RINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
87
+ INFO 01-04 14:04INFO: INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
88
+ INFO: INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" INFO 01-04 14:04:26 [loggers.py:111] Engine 000: Avg prompt throughput: 180.8 tokens/s, Avg generation throughput: 71.2 tokens/s,INFO 01-04 14:04:27 [logINFO 01-04 14:04:28 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:04:36 [loggers.py:111] Engine 000: Avg prompt throughput: 178.4 tokens/s, Avg generation throughput: 74.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 3.5%
89
+ IINFO 01-04 14:04:38 [loggers.py:111] Engine 000: Avg promptINFO: INFOINFO 01-04 14:04:43 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:04:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.2 tokeINFO 01-04 14:04:47 [loggers.INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 2INFOINFO 01-04 14:04:56 [loggers.py:111] Engine 000: Avg prompt throughput: 95.0 tokens/s, Avg generation throughput: 24.4 tokININFO 01-04 14:04:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg genINFO 01-04 14:05:06 [loggers.py:111] Engine 000: Avg prompt throughput: 119.1 tokens/s, Avg generation throughput: 78.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%,INFO 01-04 14:05:06 [loggers.INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
90
+ INFO 01-04 14:05:16 [loggers.py:111] Engine 000: Avg prompt throughput: 217.8 tokens/s, Avg generation throughput: 115.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 3.0%
91
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
92
+ INFO: INFO 01-04 14:05:23 [loggers.py:111] Engine 000: Avg prompINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
93
+ INFO 01-04 14:05:26 [loggers.py:111] Engine 000: Avg prompt throughput: 211.6 tokens/s, Avg genINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
94
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1INFO 01-04 14:05:36 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:05:36 [loggers.py:111] Engine 000: Avg prompt throughput: 153.9 tokens/s, Avg generation throughput: 103.7 tokens/s, Running: 3 reqs, Waiting: 0 INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
95
+ INFO 01-04 14:05:46 [loggers.py:111] Engine 000: Avg prompt throughput: 191.4 tokens/s, Avg generation throughput: 59.1 tokens/s, Running: 1 reqs, WaitingINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
96
+ INFOINFO 01-04 14:05:53 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:05:56 [loggers.py:111] Engine 000: Avg prompt throughput: 254.5 tokens/s, Avg generation throughput: 18.7 INFO 01-04 14:05:56 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:05:57 [loggers.py:11INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
97
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
98
+ INFO 01-04 14:06:06 [loggers.py:111] Engine 000: AvINFO 01-04 14:06:07 [loggers.py:111] Engine 000: Avg prompt throughput: 230.6 tokens/s, Avg generationINFO 01-04 14:06:08 [loggers.py:111] Engine 000: Avg promptINFO: INFO 01-04 14:06:16 [loggers.py:111] Engine 000: Avg prompt throughput: 244.9 tokens/s, Avg generation throughput: 22.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1INFO 01-04 14:06:17 [loggers.py:1INFO 01-04 14:06:26 [loggers.py:111] Engine 000: Avg prompt throughput: 158.9 tokens/s, Avg generation throughput: 74.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2INFO: 10.45.190.192:0 - "POSTINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
99
+ INFO 01-04 14:06:36 [loggers.py:111] Engine 000: Avg prompt throughput: 223.0 tokens/s, Avg generation throughput: 120.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO: 10.46.17.192:0 - "POST /vINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
100
+ INFO 01-04 14:06:46 [loggers.py:111] Engine 000: Avg prompt throughput: 254.9 tokens/s, Avg generation throughput: 128.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 10.4%
101
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
102
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/INFO 01-04 1INFO 01-04 14:06:56 [loggers.py:111] Engine 000: Avg prompt throughput: 436.6 tokens/s, Avg generation throughput: 43.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache INFO 01-04 14:06:57 [loggers.py:111] EINFOINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 INFO 01-04 14:07:03 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:07:06 [loggers.py:111] EnginINFO 01-04 14:07:06 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:07:07 [loggers.py:111] EINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
103
+ INFININFO 01-04 14:07:16 [loggers.py:111] Engine 000: Avg prompt throughput: 205.2 tokens/s, Avg generation throughput: 42.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cachINFO 01-04 14:07:17 [loggers.py:111] EnINFO 01-04 14:07:23 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:07:26 [loggers.py:111] Engine 000: Avg prompt throughput: 238.8 tokens/s, Avg generation throuINFO 01-04 14:07:27 [loggers.py:111] Engine 000: Avg prompt throughput: 148.8 tokens/s, Avg generation thrINFO INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
104
+ INFO 01-04 14:07:36 [loggers.py:111] Engine 000: Avg prompt throughput: 234.1 tokens/s, Avg generation IINFO 01-04 14:07:37 [loggers.py:111] EngineINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200INFOINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
105
+ INFO 0INFO 01-04 14:07:46 [loggers.py:111] Engine 000: Avg prompt throughput: 269.4 tokens/s, Avg generation throughput: 52.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPINFO 01-04 14:07:47 [loggers.py:111] EngiINFO 0INFO: INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
106
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
107
+ INFO 01-04 14:07:56 [loggers.py:111]INFO 01-04 14:07:56 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:07:57 [loggers.py:111] EnINFO 01-04 14:07:58 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 1INFO 01-04 14:08:06 [loggers.py:111] Engine 000: Avg prompt throughput: 286.0 tokens/s, Avg generaINFO 01-04 14:08:07 [loggers.py:111] Engine 000: Avg prompt throughput: 194.5 tokens/s, Avg generation throuINFO: 10.INFO: 10.46.50.192:0 - "POST /v1/completINFO: INFO 01-04 14INFO 01-04 14:08:16 [loggers.py:111] Engine 000: Avg prompt throughput: 291.9 tokens/s, Avg generINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
108
+ INFO 01-04 14:08:17 [loggers.py:111] Engine INFO: 10.4INFO: 10.45.190.192:0 - "POST /v1/completions HTINFO 01-04 14:0INFO 01-04 14:08:26 [loggers.py:111] Engine 000: Avg prompt throughput: 276.7 tokens/s, Avg generation throughput: 65.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache INFO 01-04 14:08:33 [loggers.py:111] Engine 000: Avg prompt throughput: 232.9 tokenINFO 01-04 14:08:36 [loggers.py:111] Engine 000: Avg prompt throughput: 389.1 tokens/s, Avg generation throughput: 25.7 tokens/s, RunniINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 2INFO 0INFO 01-04 14:08:43 [loggers.py:111] Engine 000: Avg prompt throughput: 343.1 tokeINFO 01-04 14:08:46 [loggers.py:111] Engine 000: Avg prompt throughput: 205.5 tokens/s, Avg generation throughput: 45.2 tokens/s, RuINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
109
+ INFO: 10.46.17.1INFO 01-04 14:08:56 [loggers.py:111] Engine 000: Avg prompt throughput: 254.5 tokens/s, Avg generation throughput: 76.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 17.9%
110
+ INFO: 10.45.190.192:0 - "POST /v1/completiINFO: 10.43.30.4:INFO 01-04 14:09:06 [loggers.py:111] Engine 000: Avg prompt throughput: 307.6 tokens/s, Avg generation throughput: 68.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 17.8%
111
+ INFO 01-04 14:09:16 [loggers.py:111] Engine 000: Avg prompt throughput: 352.6 tokens/s, Avg generation throughput: 91.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 17.1%
112
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
113
+ INFO 01-04 14:09:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 135.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 17.1%
114
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
115
+ INFO: 10.46.50.192:0 - "POST /v1/compINFO 01-04 14:09:33 [loggINFO 01-04 14:09:36 [loggers.py:111] Engine 000: Avg prompt throughput: 418.5 tokens/s, Avg generation throughput: 57.7 tokens/s, Running: 1 reqs, Waiting: 0INFO 01-04 14:09:36 [loggers.py:111]INFO: 10.46.17.192:0 -INFO: 10.43.30.4:0 - "POST /v1/complINFO 01-04 14:09:43 [logINFO 01-04 14:09:46 [loggers.py:111] Engine 000: Avg prompt throughput: 281.4 tokens/s, Avg generation throughput: 43.8 tokens/s, Running: 1 reqs, Waiting: 0 rINFO 01-04 14:09:46 [loggers.py:111INFO: 10.43.30.4:0 - INFO 01-04 14:09:56 [loggers.py:111] Engine 000: Avg prompt throughput: 337.7 tokens/s, Avg generation throughput: 81.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 19.4%
116
+ INFO: 10.43.30.4:0 - "POST /v1/comINFO 01-04 14:10:03 [loggeINFO 01-04 14:10:06 [loggers.py:111] Engine 000: Avg prompt throughput: 369.8 tokens/s, Avg generation throughput: 96.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 18.7%
117
+ INFO: 10.46.50.192:0 - "POST /v1/coINFO 01-04 14:10:13 [loggerINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
118
+ INFO 01-04 14:10:16 [loggers.py:111] Engine 000: Avg prompt throughput: 674.9 tokens/s, Avg generation throughput: 99.4 tokenINFO 01-04 14:10:23 [loggers.py:111] Engine 000: Avg prompt throughput: 264.6 tokens/s, Avg generation throughput: 127.9 tokens/s, Running: 3 reqs, Waiting: 0INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 INFINFO 01-04 14:10:26 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:10:33 [loggers.py:111] Engine 000: Avg prompt throughput: 205.2 tokens/s, Avg generation throughput: 116.9 tokens/s, Running: 3 reqs, WaitiINFO 01-04 14:10:36 [loggers.py:111] Engine 000: Avg prompt througINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
119
+ INFO 01-04 14:10:43 [loggers.py:111] Engine 000: Avg prompt throughput: 327.7 tokens/sINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
120
+ INFO 01-04 14:10:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 41.8 tokens/s, RunniINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
121
+ INFO 01-04 14:10:53 INFO 01-04 14:10:56 [loggers.py:111] Engine 000: Avg prompt throughput: 339.0 tokens/s, Avg generation throughput: 3.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
122
+ INFO 01-04 14:11:03 [lINFO 01-04 14:11:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg geINFO 01-04 14:11:16 [loggers.py:111] Engine 000: Avg prompt throughput: 479.2 tokens/s, Avg generation throughput: 12.6 tokenINFO 01-04 14:11:16 [loggers.py:111] Engine 000: Avg prompt throughput: 359.1 tokens/s, Avg geINFO 01-04 14:11:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, PrefINFO 01-04 14:11:26 [loggINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
123
+ INFO 01-04 14:11:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation INFO 01-04 14:11:43 [loggers.py:111] Engine 000: Avg prompt throughput: 357.3 tokens/s, Avg generation throughput: 3INFO 01-04 14:11:46 [loggers.py:111] Engine 000: Avg prompt throughput: 352.1 tokens/s, Avg generatioINFO 01-04 14:11:53 [loggeINFO 01-04 14:11:56 [loggers.py:111] Engine 000: Avg prompt throughput: 277.9 tokens/s, Avg generation throughput: 14.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, INFO: 10.46.50.192:0 - "PINFO: 10.46.50.192:0 - "POST /v1/INFO 01-04 14:12:03 [loggers.INFO 01-04 14:12:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 32.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%,INFO 01-04 14:12:13 [loggers.pINFO 01-04 14:12:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvgINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
124
+ INFO 01-04 14:12:26 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:12:26 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:12:27 [loggers.py:111] Engine 000: Avg prompt throughput: 313.4 tokens/s, Avg generation INFO 01-04 14:12:36 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:12:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 95.0 tokens/s, Running: 2 reqs, Waiting: 0INFO 01-04 14:12:46 [loggers.py:111] Engine 000: Avg prompt throughput: 563.3 tokens/s, Avg generation throughput:INFO 01-04 1INFO 01-04 14:12:46 [loggers.py:111] Engine 000: Avg prompt throughput: 284.4 tokens/s, Avg gINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
125
+ INFO 01-04 14:12:56 [loggers.py:111] Engine 000: INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
126
+ INFO 01-04 14:12:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation tINFO 01-04 14:13:06 [loggers.py:111] Engine 000: Avg prompt throughput: 242.2 tokens/s, Avg generation throughput:INFO 01-04 14:13:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throINFO 01-04 14:13:16 [loggers.py:111] Engine 000: Avg prompt throughput: 273.4 tokens/s, Avg generation throughINFO 01-04 14:13:17 [loggers.py:111] Engine 000: Avg prompt throughput: 433.9 tokens/s, Avg generation througINFO 01-04 14:13:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cacheINFO 01-04 14:13:27 [loggers.py:111] EngineINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
127
+ INFO 01-04 14:13:36 [loggers.py:111] Engine 000: Avg prompt throughput: 595.4 tokens/s, Avg generation thrINFO 01-04 14:13:37 [loggers.py:111] Engine 000: Avg prompt throughput: 299.7 tokens/s, Avg generation throughputINFO 01-04 14:13:46 [loggers.py:111] Engine 000: Avg prompt throughput: 281.5 tokens/s, Avg generation throughput: 154.6 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.7%, Prefix cache hit rate: 10.1%
128
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
129
+ INFO: 10.46.17.192:0 - "POST /v1/coINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
130
+ INFO: 10.46.17.192:0 INFO 01-04 14:13:56 [loggers.py:111] EngiINFO 01-04 14:13:57 [loggers.py:111] Engine 000: Avg prompt throughput: 298.1 tokens/s, Avg generation throughput: 52.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 5.1%
131
+ INFO: 10.43.30.3:0 - INFO 01-04 14:14:06 [loggers.py:111] EnINFO 01-04 14:14:07 [loggers.py:111] Engine 000: Avg prompt throughput: 379.9 tokens/s, Avg generation throughput: 12.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usINFO: 10.46.17.192:0 - "POST /v1/coINFO: 10.43.30.4:0 - "PINFO 01-04 14:14:16 [loggers.py:111] INFO 01-04 14:14:17 [loggers.py:111] Engine 000: Avg prompt throughput: 369.1 tokens/s, Avg generation throughput: 51.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usINFO 01-04 14:14:26 [loggers.py:111] EnINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
132
+ INFO 01-04 14:14:27 [loggers.py:111] Engine 000: Avg prompt throughput: 395.9 tokens/s, Avg generation throughput: 6INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
133
+ INFO: 10.45.190.192:0 - "POST /v1/INFO: 10.46.17.192:0 - "PINFO 01-04 14:14:36 [loggers.py:111] INFO 01-04 14:14:37 [loggers.py:111] Engine 000: Avg prompt throughput: 158.3 tokens/s, Avg generation throughput: 51.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usagINFO: 10.46.17.192:0 - "POST /v1/INFO 01-04 14:14:47 [loggers.py:111] Engine 000: Avg prompt throughput: 600.4 tokens/s, Avg generation throughput: 76.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 5.2%
134
+ INFO: 10.43.30.5:0 - "POSINFO: 10.43.30.5:0 - "POST /v1/INFO 01-04 14:14:57 [loggers.py:111] Engine 000: Avg prompt throughput: 326.7 tokens/s, Avg generation throughput: 95.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 5.1%
135
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
136
+ INFO 01-04 14:15:07 [loggers.py:111] Engine 000: Avg prompt throughput: 268.5 tokens/s, Avg generation throughput: 101.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 5.9%
137
+ INFO: 10.43.30.3:0 - "INFO: 10.46.50.192:0 - "POST /v1/cINFO 01-04 14:15:17 [loggers.py:111] Engine 000: Avg prompt throughput: 853.6 tokens/s, Avg generation throughput: 97.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 7.9%
138
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
139
+ INFO 01-04 14:15:27 [loggers.py:111] Engine 000: Avg prompt throughput: 627.9 tokens/s, Avg generation throughput: 121.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.0%, Prefix cache hit rate: 7.6%
140
+ INFO: 10.46.17.192:0 - INFO 01-04 14:15:36 [loggers.py:111] Engine 000: Avg prompt throughput: 339.4 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 10.6%
141
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
142
+ INFO 01-04 14:15:46 [loggers.py:111] INFO 01-04 14:15:47 [loggers.py:111] Engine 000: Avg prompt throughput: 336.0 tokens/s, Avg generation throughput: 105.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usagINFO: 10.43.30.3:0 - "POST /v1/coINFO: 10.43.30.4:0 - "PINFO 01-04 14:15:56 [loggers.py:111] INFO 01-04 14:15:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 128.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-04 14:16:06 [loggers.py:11INFO 01-04 14:16:07 [loggers.py:111] Engine 000: Avg prompt throughput: 435.4 tokens/s, Avg generation throughput: 115.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
143
+ INFO 01-04 14:16:16 [loggers.py:111] Engine 000: Avg prompt throughput: 528.1 tokens/s, Avg geneINFO 01-04 14:16:17 [loggers.py:111] Engine 000: Avg prompt throughput: 574.4 tokens/s, Avg generation throughput: 90.7 tokeINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
144
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
145
+ INFO: 10.46.17.192:0 - "INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
146
+ INFO 01-04 14:16:27 [loggers.py:111] Engine 000: Avg prompt throughput: 563.7 tokens/s, Avg generation throughput: 75.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, PrINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
147
+ INFO 01-04 14:16:36 [loINFO 01-04 14:16:37 [loggers.py:111] Engine 000: Avg prompt throughput: 296.2 tokens/s, Avg generation throughput: 36.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, PrefixINFO 01-04 14:16:46 [lINFO 01-04 14:16:47 [loggers.py:111] Engine 000: Avg prompt throughput: 461.9 tokens/s, Avg generation throughput: 56.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, PrefiINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
148
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
149
+ INFO 01-04 14:16:56 [logINFO 01-04 14:16:57 [loggers.py:111] Engine 000: Avg prompt throughput: 529.9 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache uINFO 01-04 14:16:INFO 01-04 14:17:06 [loINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
150
+ INFO 01-04 14:17:07 [loggers.py:111] Engine 000: Avg prompt throughput: 368.6 tokens/s, Avg generation throughput: 85.2 tokens/s, INFO 01-04 14:17:16 [loggers.py:111] Engine 000: Avg prompt throughput: 1126.2 tokens/s,INFO 01-04 14:17:17 [loggers.py:111] Engine 000: Avg prompt throughput: 578.2 tokens/s, Avg generation throughput: 62.1 tokens/s, RuINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
151
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
152
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
153
+ INFO 01-04 14:17:26 INFO 01-04 14:17:27 [loggers.py:111] Engine 000: Avg prompt throughput: 596.8 tokens/s, Avg generation throughput: 58.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix INFO 01-04 14:17:36 [INFO 01-04 14:17:37 [loggers.py:111] Engine 000: Avg prompt throughput: 683.6 tokens/s, Avg generation throughput: 71.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.4%, Prefix cache hit rate: 9.8%
154
+ INFO: 10.43.30.4:0 - "POST /v1/completioINFO 01-04 14:17:46 [loggers.py:111] Engine 000: Avg prompt throughput: 171.9 tokensINFO 01-04 14:17:47 [loggers.py:111] Engine 000: Avg prompt throughput: 953.2 tokens/s, Avg generation throughput: 60.7 tokens/s, RunniINFO 01-04 14:17:56 [loggers.py:111] Engine 000: Avg prompt throughput: 665.4 tokens/s, Avg generation throughput: 59.1 tokens/s, Running: 2 reqs, WaiINFO 01-04 14:17:57 [loggers.py:111] Engine 000: Avg prompt throughpuINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
155
+ INFO 01-04 14:18:06 [loggers.py:111] Engine 000: Avg prompt throughput: 308.4 tokens/s, Avg generation throughput: 104.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 13.2%
156
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
157
+ INFO 01-04 14:18:16 [loggers.py:111] Engine 000: Avg prompt throughput: 329.6 tokens/s, Avg generation throughput: 77.5 tokens/s, Running: 2 reqs, WaiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
158
+ ININFO 01-04 14:18:26 [loggers.py:111] Engine 000: Avg prompt throughput: 245.1 tokens/s, Avg generation throughput: 128.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 12.9%
159
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
160
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
161
+ INFO 01-04 14:18:36 [loggers.py:111] Engine 000: Avg prompt throughput: 483.9 tokens/s, Avg generation throughput: 116.6 tokens/s, Running: 2 reqs, WaINFO 01-04 14:18:37 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:18:46 [loggers.py:111] Engine 000: Avg prompt throughput: 513.4 tokens/s, Avg generation throughput: 123.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.2%, Prefix cache hit rate: 13.3%
162
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
163
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
164
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.INFO 01-04INFO 01-04 14:18:56 [loggers.py:111] Engine 000: Avg prompt throughput: 393.9 tokens/s, Avg generation throughput: 31.2 tokens/s, Running: 1 reqs,INFO 01-04 14:18:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFO 01-04 14:19:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs,INFO 01-04 14:19:07 [loggers.py:111] Engine 000: Avg prompt throughput: 5INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
165
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
166
+ INFO 01-04 14:19:16 [loggers.py:111] Engine 000: Avg prompt throughput: 727.4 tINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
167
+ INFO 01-04 14:19:17 [loggers.py:111] Engine 000: Avg prompt throughput: 5INFO 01-04 14:19:26 [loggers.py:111] Engine 000: Avg prompt throughput: 322.2 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqsINFO 01-04 14:19:27 [loggers.py:111] Engine 000: Avg prompt throughput: 63INFO 01-04 14:19:36 [loggers.py:111] Engine 000: Avg prompt throughput: 589.9 tokens/s, Avg generation throughput: 67.6 tokens/s, Running: 2 reINFO 01-04 14:19:37 [loggers.py:111] Engine 000: Avg prompt throughput: 437.INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
168
+ INFO 01-04 14:19:46 [loggers.py:111] Engine 000: Avg prompt throughput: 287.7 tokens/s, Avg generation throughput: 108.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hitINFO 01-04 14INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
169
+ INFO 01-04 14:19:56 [loggers.py:111] Engine 000: Avg prompt throughput: 261.5 tokens/s, Avg generation throughput: 112.4 tokens/s, Running: 2INFO 01-04 14:19:57 [loggers.py:111] Engine 000: Avg prompt throughput: 600.4 tINFO 01-04 14:20:06 [loggers.py:111] Engine 000: Avg prompt throughput: 529.4 tokens/s, Avg generation throughput: 113.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hINFO 01-04 14:2INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
170
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
171
+ INFO 01-04 14:20:16 [loggers.py:111] Engine 000: Avg prompt throughput: 677.4 tokens/s, Avg generation throughput: 84.8 tokens/s, RunINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
172
+ INFO 01-04 14:20:17 INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
173
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
174
+ INFO 01-04 14:20:26 [loggers.py:111] Engine 000: Avg prompt throughINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
175
+ INFO 01-04 14:20:27 [loggers.py:111] Engine 000: Avg prompt throughput: 599.8 tokens/INFO 01-04 14:20:36 [loggers.py:111] Engine 000: Avg prompt throughput: 761.0 tokens/s, Avg generation throughput: 62.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.2%, Prefix cacINFO 01-04 14:20:37INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
176
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
177
+ INFO 01-04 14:20:46 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:20:47 [loggers.py:111] Engine 000: Avg prompt throughput: 634.7 tokens/s, Avg generation throughput: 54.4 tokens/s, Running: 2 reqs, WaiINFO 01-04 14:20:56 [loggers.py:111] Engine 000: Avg prompt throughput: 266.2 tokens/s, Avg generation throughput: 73.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 12.5%
178
+ INFO: 10.45.190.192:0 - "POST /v1/compleINFO 01-04 14:20:58 [loINFO 01-04 14:21:06 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:21:07 [loggers.py:111] Engine 000: Avg prompt throughput: 625.0 tokens/s, Avg generation throughput: 111.1 tokens/s, Running: 2 reqs, WaitiINFO 01-04 14:21:16 [loggers.py:111] Engine 000: Avg prompt throughput: 549.0 tokens/s, Avg generation throughput: 74.0 tokens/s, RINFO 01-04 14:21:17 [loggers.py:111] Engine 000: Avg prompt throughput: 870.4 tokens/s, INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
179
+ INFO 01-04 14:21:26 [loggers.py:111] Engine 000: Avg prompt throughput: 635.4 tokens/s, Avg generation throughput: 101.0 tokens/s, INFO 01-04 14:21:27 [loggers.py:111] Engine 000: Avg prompt throughput: 541.1 tokens/s, AINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
180
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
181
+ INFO 01-04 14:21:36 [loggers.py:111] Engine 000: Avg prompt throughput: 656.7 tokens/s, Avg generation throughput: 65.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 14.4%
182
+ INFO: 10.45.190.192:0 - "POST /v1/completINFO 01-04 14:21:3INFOINFO 01-04 14:21:46 [loggers.py:111] Engine 000: Avg prompt throughput: 637.5 tokens/s, Avg generation throughput: 30.7 tokensINFO 01-04 14:21:47 [loggers.py:111] Engine 000: Avg prompt throughput:INFO: 10.43.30.3:0INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OKIINFO 01-04 14:21:56 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:21:57 [loggers.py:111] Engine 000: Avg prompt throughput: 632.1 tokens/s, Avg generation throughput: 70.0 tokens/s, RunninINFO 01-04 14:21:58 INFINFO 01-04 14:22:06 [loggers.py:111] Engine 000: Avg prompt throughput: 665.3 tokens/s, Avg generation throughput: 50.8 tokeINFO 01-04 14:22:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generINFO 01-04 14:22:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.4%, INFO 01-04 14:22:17 [loggers.INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
183
+ INFO 01-04 14:22:26 [loggers.py:111] Engine 000: Avg prompt throughput: 589.8 tokens/s, Avg generation throughput: 111.5 tokenINFO 01-04 14:22:27 [loggers.py:111] Engine 000: Avg prompt throughput: 1509.7 tokens/s, Avg gINFO 01-04 14:22:36 [loggers.py:111] Engine 000: Avg prompt throughput: 672.7 tokens/s, Avg generation throughput: 96.9 tokensINFO 01-04 14:22:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg genINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
184
+ INFO 01-04 14:22:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 102.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.9%, PrINFO: 10.43.30.3:0 - "PINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
185
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
186
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
187
+ INFO 01-04 14:22:56 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:22:57 [loggers.py:111] Engine 000: Avg prompt throughput: 810.9 tokens/s, Avg generation throughput: 65.9 tokens/s, Running: 2 reqs, WaitingINFO 01-04 14:23:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RunnINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
188
+ INFO 01-04 14:23:07INFO 01-04 14:23:26 [loggers.py:111] Engine 000: Avg prompt throughput: 1094.5 tokens/s, Avg generation throughput: 44.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 13.9%
189
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
190
+ INFO 01-04 14:23:36 [loggers.py:111] Engine 000: Avg prompt throughput: 699.4 tokens/s, Avg generation throughput: 48.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.8%, Prefix cache hit rate: 13.7%
191
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
192
+ INFO 01-04 14:23:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 57.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 13.7%
193
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
194
+ INFO 01-04 14:23:56 [loggers.py:111] Engine 000: Avg prompt throughput: 715.5 tokens/s, Avg generation throughput: 41.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 13.4%
195
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
196
+ INFO 01-04 14:24:06 [loggers.py:111] Engine 000: Avg prompt throughput: 726.9 tokens/s, Avg generation throughput: 72.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 13.2%
197
+ INFO 01-04 14:24:16 [loggers.py:111] Engine 000: Avg prompt throughput: 883.9 tokens/s, Avg generation throughput: 42.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.7%, Prefix cache hit rate: 12.9%
198
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
199
+ INFO 01-04 14:24:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 55.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 12.9%
200
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
201
+ INFO 01-04 14:24:36 [loggers.py:111] Engine 000: Avg prompt throughput: 695.2 tokens/s, Avg generation throughput: 23.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 12.7%
202
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
203
+ INFO 01-04 14:24:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 26.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.7%
204
+ INFO 01-04 14:24:56 [loggers.py:111] Engine 000: Avg prompt throughput: 445.6 tokens/s, Avg generation throughput: 25.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 12.6%
205
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
206
+ INFO 01-04 14:25:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 10.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.6%
207
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
208
+ INFO 01-04 14:25:16 [loggers.py:111] Engine 000: Avg prompt throughput: 728.3 tokens/s, Avg generation throughput: 18.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.4%
209
+ INFO 01-04 14:25:26 [loggers.py:111] Engine 000: Avg prompt throughput: 517.9 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 12.2%
210
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
211
+ INFO 01-04 14:25:36 [loggers.py:111] Engine 000: Avg prompt throughput: 602.2 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 12.0%
212
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
213
+ INFO 01-04 14:25:46 [loggers.py:111] Engine 000: Avg prompt throughput: 627.1 tokens/s, Avg generation throughput: 31.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 11.9%
214
+ INFO 01-04 14:25:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 11.9%
215
+ INFO 01-04 14:26:06 [loggers.py:111] Engine 000: Avg prompt throughput: 705.4 tokens/s, Avg generation throughput: 48.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.6%, Prefix cache hit rate: 11.7%
216
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
217
+ INFO 01-04 14:26:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 71.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 11.7%
218
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
219
+ INFO 01-04 14:26:26 [loggers.py:111] Engine 000: Avg prompt throughput: 538.3 tokens/s, Avg generation throughput: 54.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 11.6%
220
+ INFO 01-04 14:26:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 11.6%
221
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
222
+ INFO 01-04 14:26:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 39.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.6%
223
+ INFO 01-04 14:26:56 [loggers.py:111] Engine 000: Avg prompt throughput: 574.7 tokens/s, Avg generation throughput: 38.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 11.4%
224
+ INFO 01-04 14:27:06 [loggers.py:111] Engine 000: Avg prompt throughput: 724.1 tokens/s, Avg generation throughput: 68.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache hit rate: 11.2%
225
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
226
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
227
+ INFO 01-04 14:27:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.2%
228
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
229
+ INFO 01-04 14:27:26 [loggers.py:111] Engine 000: Avg prompt throughput: 800.4 tokens/s, Avg generation throughput: 26.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.0%
230
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
231
+ INFO 01-04 14:27:36 [loggers.py:111] Engine 000: Avg prompt throughput: 598.9 tokens/s, Avg generation throughput: 15.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.9%
232
+ INFO 01-04 14:27:46 [loggers.py:111] Engine 000: Avg prompt throughput: 769.5 tokens/s, Avg generation throughput: 10.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 11.9%
233
+ INFO 01-04 14:27:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 11.9%
234
+ INFO 01-04 14:28:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 11.9%
235
+ INFO 01-04 14:28:16 [loggers.py:111] Engine 000: Avg prompt throughput: 219.4 tokens/s, Avg generation throughput: 79.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cache hit rate: 11.9%
236
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
237
+ INFO 01-04 14:28:26 [loggers.py:111] Engine 000: Avg prompt throughput: 462.0 tokens/s, Avg generation throughput: 54.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 11.8%
238
+ INFO 01-04 14:28:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 11.8%
239
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
240
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
241
+ INFO 01-04 14:28:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 43.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.8%
242
+ INFO 01-04 14:28:56 [loggers.py:111] Engine 000: Avg prompt throughput: 875.8 tokens/s, Avg generation throughput: 10.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 11.6%
243
+ INFO 01-04 14:29:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 11.6%
244
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
245
+ INFO 01-04 14:29:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 31.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.6%
246
+ INFO 01-04 14:29:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.6%
247
+ INFO 01-04 14:29:46 [loggers.py:111] Engine 000: Avg prompt throughput: 890.9 tokens/s, Avg generation throughput: 12.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 11.4%
248
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
249
+ INFO 01-04 14:29:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 34.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.4%
250
+ INFO 01-04 14:30:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.4%
251
+ INFO 01-04 14:30:26 [loggers.py:111] Engine 000: Avg prompt throughput: 974.1 tokens/s, Avg generation throughput: 37.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache hit rate: 11.2%
252
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
253
+ INFO 01-04 14:30:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 15.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.2%
254
+ INFO 01-04 14:30:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.2%
255
+ INFO 01-04 14:30:56 [loggers.py:111] Engine 000: Avg prompt throughput: 640.3 tokens/s, Avg generation throughput: 26.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 11.0%
256
+ INFO 01-04 14:31:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 11.0%
257
+ INFO 01-04 14:31:16 [loggers.py:111] Engine 000: Avg prompt throughput: 422.4 tokens/s, Avg generation throughput: 46.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.7%, Prefix cache hit rate: 10.9%
258
+ INFO 01-04 14:31:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 91.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.0%, Prefix cache hit rate: 10.9%
259
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
260
+ INFO 01-04 14:31:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 84.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 10.9%
261
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
262
+ INFO 01-04 14:31:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 11.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.9%
263
+ INFO 01-04 14:31:56 [loggers.py:111] Engine 000: Avg prompt throughput: 807.4 tokens/s, Avg generation throughput: 13.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 11.8%
264
+ INFO 01-04 14:32:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 11.8%
265
+ INFO 01-04 14:32:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 11.8%
266
+ INFO 01-04 14:32:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 11.8%
267
+ INFO 01-04 14:32:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache hit rate: 11.8%
268
+ INFO 01-04 14:32:46 [loggers.py:111] Engine 000: Avg prompt throughput: 540.7 tokens/s, Avg generation throughput: 61.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.3%, Prefix cache hit rate: 11.7%
269
+ INFO 01-04 14:32:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.7%, Prefix cache hit rate: 11.7%
270
+ INFO 01-04 14:33:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.0%, Prefix cache hit rate: 11.7%
271
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
272
+ INFO 01-04 14:33:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.7%, Prefix cache hit rate: 11.7%
273
+ INFO 01-04 14:33:26 [loggers.py:111] Engine 000: Avg prompt throughput: 617.3 tokens/s, Avg generation throughput: 88.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.4%, Prefix cache hit rate: 11.5%
274
+ INFO 01-04 14:33:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 89.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.8%, Prefix cache hit rate: 11.5%
275
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
276
+ INFO 01-04 14:33:46 [loggers.py:1INFO 01-04 14:33:48 [loggers.py:111] Engine 000: Avg prompt throughput: 820.8 tokens/s, Avg generation throughput: 20.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-04 14:33:56 [loggers.py:111INFO 01-04 14:33:58 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-04 14:34:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation INFO 01-04 14:34:08 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 25.INFO 01-04 14:34:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation INFO 01-04 14:34:18 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0INFO 01-04 14:34:26 [loggers.py:111] Engine 000: Avg prompt throughput: 873.3 tokens/s, Avg generatiINFO 01-04 14:34:48 [loggers.py:111] Engine 000: Avg prompt throughput: 759.6 tokens/s, Avg generation throughput: 45.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 21.7%
277
+ INFO 01-04 14:34:58 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:35:03 [loggers.py:111] Engine 000: Avg prompt throughput: 925.1 tokens/s, Avg generation throughput: 36.8 tokens/s, Running: 1 reqs, Waiting: 0 rINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" INFO 01-04 14:35:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache hit rate: 19.3%
278
+ INFO 01-04 14:35:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: 19.3%
279
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
280
+ INFO 01-04 14:35:33INFO 01-04 14:35:46 [loggers.py:111] Engine 000: Avg prompt throughput: 898.0 tokens/s, Avg generation throughput: 30.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 16.4%
281
+ INFO 01-04 14:35:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, RunINFO 01-04 14:36:46 [loggers.py:111] Engine 000: Avg prompt throughput: 1207.6 tokens/s, Avg generation throughput: 26.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.8%, Prefix cache hit rate: 16.0%
282
+ INFO 01-04 14:36:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.0%, Prefix cache hit rate: 16.0%
283
+ INFO: 10.46.50.192:0 - "POSTINFO 01-04 14:36:56 [loggers.py:11INFO 01-04 14:37:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokensINFO 01-04 14:37:26 [loggers.py:111] Engine 000: Avg prompt throughput: 615.0 tokens/s, Avg generation throughput: 40.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 17.1%
284
+ INFO 01-04 14:37:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, RunINFO 01-04 14:38:16 [loggers.py:111] Engine 000: Avg prompt throughput: 521.8 tokens/s, Avg generation throughput: 38.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 15.9%
285
+ INFO 01-04 14:38:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 15.9%
286
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
287
+ INFO 01-04 14:38:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/INFO 01-04 14:38:46 [loggers.py:111] Engine 000: Avg prompt throughput: 471.6 tokens/s, Avg generation throughput: 8.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 16.9%
288
+ INFO 01-04 14:38:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, RunnINFO 01-04 14:40:36 [loggers.py:111] Engine 000: Avg prompt throughput: 1101.8 tokens/s, Avg generation throughput: 34.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 17.0%
289
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
290
+ INFO 01-04 14:40:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 31.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.0%
291
+ INFO 01-04 14:40:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.0%
292
+ INFO 01-04 14:42:16 [loggers.py:111] Engine 000: Avg prompt throughput: 698.6 tokens/s, Avg generation throughput: 32.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 16.8%
293
+ INFO 01-04 14:42:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 16.8%
294
+ INFO 01-04 14:42:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 16.8%
295
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
296
+ INFO 01-04 14:42:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 19.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.8%
297
+ INFO 01-04 14:42:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.8%
298
+ INFO 01-04 14:44:26 [loggers.py:111] Engine 000: Avg prompt throughput: 443.6 tokens/s, Avg generation throughput: 23.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 16.7%
299
+ INFO 01-04 14:44:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 16.7%
300
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
301
+ INFO 01-04 14:44:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 23.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0INFO 01-04 14:45:26 [loggers.py:111] Engine 000: Avg prompt throughput: 728.0 tokens/s, Avg generation throughput: 22.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 13.3%
302
+ INFO: 10.46.50.192:0 - "PINFO 01-04 14:46:26 [loggers.py:111] Engine 000: Avg prompt throughput: 355.5 tokens/s, Avg generation throughput: 39.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 16.6%
303
+ INFO 01-04 14:46:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix cache hit rate: 16.6%
304
+ INFO 01-04 14:46:46 [loggers.py:11INFO 01-04 14:50:26 [loggers.py:111] Engine 000: Avg prompt throughput: 645.3 tokens/s, Avg generation throughput: 45.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 13.2%
305
+ INFO 01-04 14:50:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 13.2%
306
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
307
+ INFO 01-04 14:50:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 10.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.2%
308
+ INFO 01-04 14:50:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.2%
309
+ INFO 01-04 15:05:16 [loggers.py:111] Engine 000: Avg prompt throughput: 507.4 tokens/s, Avg generation throughput: 36.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 13.0%
310
+ INFO 01-04 15:05:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 13.0%
311
+ INFO 01-04 15:05:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 13.0%
312
+ INFO 01-04 15:05:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 13.0%
313
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
314
+ INFO 01-04 15:05:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 19.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.0%
315
+ INFO 01-04 15:06:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.0%
316
+ n throughput: 47.5 tokensINFO 01-04 15:04:43 [loggers.py:111] Engine 000: Avg prompt throughput: 503.2 tokens/s, Avg generation throughput: 29.2 tokens/s, Running: 1 reqs, Waiting: 0INFO 01-04 15:08:27 [loggers.py:111] Engine 000: Avg prompt throughput: 530.6 tokens/s, Avg generation throughput: 15.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 12.8%
317
+ INFO 01-04 15:08:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 12.8%
318
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
319
+ INFO 01-04 15:08:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 14.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.8%
320
+ INFO 01-04 15:08:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.8%
321
+ 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.0%
322
+ pt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 19.3%
323
+ INFO 01-04 15:02:28 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 19.3%
324
+ INFO 01-04 15:02:38 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 19.3%
325
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
326
+ INFO 01-04 15:02:48 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 28.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.3%
327
+ INFO 01-04 15:02:58 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.3%
hf_ip/vllm_gpu3.log ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
2
+ warnings.warn(
3
+ INFO 01-04 13:13:33 [__init__.py:239] Automatically detected platform cuda.
4
+ INFO 01-04 13:13:36 [api_server.py:1043] vLLM API server version 0.8.5
5
+ INFO 01-04 13:13:36 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8004, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
6
+ INFO 01-04 13:13:43 [config.py:717] This model supports multiple tasks: {'generate', 'score', 'classify', 'embed', 'reward'}. Defaulting to 'generate'.
7
+ INFO 01-04 13:13:44 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
8
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
9
+ warnings.warn(
10
+ INFO 01-04 13:13:48 [__init__.py:239] Automatically detected platform cuda.
11
+ INFO 01-04 13:13:52 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
12
+ WARNING 01-04 13:13:52 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7f3dc054a8c0>
13
+ INFO 01-04 13:13:53 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
14
+ INFO 01-04 13:13:53 [cuda.py:221] Using Flash Attention backend on V1 engine.
15
+ WARNING 01-04 13:13:53 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
16
+ INFO 01-04 13:13:53 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+ INFO 01-04 13:15:40 [loader.py:458] Loading weights took 106.29 seconds
35
+ INFO 01-04 13:15:40 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 106.583423 seconds
36
+ INFO 01-04 13:15:54 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
37
+ INFO 01-04 13:15:54 [backends.py:430] Dynamo bytecode transform time: 14.34 s
38
+ INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later use
39
+ INFO 01-04 13:16:50 [backends.py:148] Compiling a graph for general shape takes 54.37 s
40
+ INFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 68.72 s in total
41
+ INFO 01-04 13:17:53 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
42
+ INFO 01-04 13:17:53 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94x
43
+ INFO 01-04 13:18:32 [gpu_model_runner.py:1686] Graph capturing finished in 39 secs, took 1.21 GiB
44
+ INFO 01-04 13:18:32 [core.py:159] init engine (profile, create kv cache, warmup model) took 172.43 seconds
45
+ INFO 01-04 13:18:32 [core_client.py:439] Core engine process 0 ready.
46
+ WARNING 01-04 13:18:32 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
47
+ INFO 01-04 13:18:32 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
48
+ INFO 01-04 13:18:32 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
49
+ INFO 01-04 13:18:32 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8004
50
+ INFO 01-04 13:18:32 [launcher.py:28] Available routes are:
51
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /openapi.json, Methods: HEAD, GET
52
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /docs, Methods: HEAD, GET
53
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: HEAD, GET
54
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /redoc, Methods: HEAD, GET
55
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /health, Methods: GET
56
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /load, Methods: GET
57
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /ping, Methods: GET, POST
58
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /tokenize, Methods: POST
59
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /detokenize, Methods: POST
60
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/models, Methods: GET
61
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /version, Methods: GET
62
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
63
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/completions, Methods: POST
64
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/embeddings, Methods: POST
65
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /pooling, Methods: POST
66
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /score, Methods: POST
67
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/score, Methods: POST
68
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
69
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /rerank, Methods: POST
70
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/rerank, Methods: POST
71
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v2/rerank, Methods: POST
72
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /invocations, Methods: POST
73
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /metrics, Methods: GET
74
+ INFO: Started server process [1316452]
75
+ INFO: Waiting for application startup.
76
+ INFO: Application startup compININFOINFO 01-04 14:00:03 [loggers.py:111] Engine 000: Avg prompt throughput: 120.9 tokens/s, Avg generation throughput: 93.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cache hit ratINININFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
77
+ INFO INFO 01-04 14:00:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 80.8 tokens/s, Running: 1 reqs, WaiIINFO 01-04 14:00:19 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:00:20 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:00:23 [loggers.py:111] Engine 000: Avg prompt throughput: 63.2 tokensINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 2INININFO 01-04 14:00:30 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:00:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqsININFO 01-04 14:00:40 [loggers.py:111] Engine 000: Avg prompt throughput: 61.5 tokens/s, Avg generation throughput: 107.7 tokens/s, RuINFO 01-04 14:00:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, WaitIINFO 01-04 14:00:50 [loggers.py:111] Engine 000: Avg prompt throughput: 54.3 tokens/s, Avg generation throughput: 118.6 tokens/s, RuINFO 01-04 14:00:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/sININFO:INFO 01-04 14:00:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.6 tokeININFO 01-04 14:01:03 [loggers.py:111] Engine 000: Avg prompt throughput: 81.3 tokens/s, Avg generation throughput: 28.1 tokens/s, Running: 1 reqs, WaitiINFO INFO 01-04 14:01:07 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:01:13 [loggers.py:111] Engine 000: Avg prompt throughput: 100.0 tokens/s, Avg generation throughput: 64.3 tokens/s, Running: 2 reqs, Waiting: 0 INFO 01-04 14:01:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 141.3 tokenINFO 01-04 14:01:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 53.7 tokens/s, Running: 1 reqs, Waiting:INFO 01-04 14:01:26 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:01:33 [loggers.py:111] Engine 000: Avg prompt throughput: 89.9 tokens/s, Avg generation throughput: 91.5 tokens/s, Running: 2 reqs, Waiting: INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
78
+ INFO 01-04 14:01:43 [loggers.py:111] Engine 000: Avg prompt throughput: 81.1 tokens/s, Avg generation throughput: 131.3 tokens/s, Running: 2 reqs, Waiting: 0INFINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1"INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
79
+ INFO 01-04 14:01:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 88.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, INFO: 10.45.190.192:0 - "POST /v1/completions HTTPINFO 01-04 14:02:03 [loggers.py:111] Engine 000: Avg prompt throughput: 163.9 tokens/s, Avg generation throughput: 93.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 0.7%
80
+ INFO: 1INFO 01-04 14:02:07 [loggers.py:111] Engine 000: Avg INFO 01-04 14:02:13 [loggers.py:111] Engine 000: Avg prompt throughput: 102.2 tokens/s, Avg generation throughput: 138.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, INFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompt throughput: 103.5 tokens/s, Avg generation throughput: 126INFO 01-04 14:02:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 95.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: INFO INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
81
+ INFO 01-04 14:02:33 [loggers.py:111] Engine 000: Avg prompt throughput: 97.1 tokens/s, Avg generation throughput: 95.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, INFO 01-04 14:02:36 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:02:43 [loggers.py:111] Engine 000: Avg prompt throughput: 96.5 tokens/s, Avg generation throughput: 107.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 0.5%
82
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
83
+ INFO 01-04 14:02:53 [loggers.py:111] Engine 000: Avg prompt throughput: 86.3 tokens/s, Avg generation throughput: 128.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 0.6%
84
+ INFO: 10.45.INFO 01-04 14:02:57 [loggers.py:111] Engine 000: AvINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
85
+ INFO 01-04 14:03:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 81.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPUINFO 01-04 14:03:07 [loggers.py:111] Engine 000: AvINFO 01-04 14:03:13 [loggers.py:111] Engine 000: Avg prompt throughput: 133.9 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPUINFO: 10.46.17.192:0 - "POST /v1/completions HTINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
86
+ INFO: INFO 01-04 14:03:20 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:03:23 [loggers.py:111] Engine 000: Avg prompt throughput: 134.6 tokens/s, Avg generation throughput: 80.3 tokens/s, Running: 1 reINFO 01-04 14:03:25 [loINFO 01-04 14:03:27 [loggers.py:111] Engine 000: AvgINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
87
+ INFO 01-04 14:03:33 [loggers.py:111] Engine 000: Avg prompt throughput: 129.5 tokens/s, Avg generatiINFO 01-04 14:03:37 [loggers.py:111] Engine 000: Avg prompt throughput: 88.4 tokens/s, Avg generation throughput: 87.1 tokens/sINFO 01-04 14:03:40 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:03:43 [loggers.py:111INFO 01-04 14:03:47 [loggers.py:111] Engine 000: Avg prompt throughput: 120.8 tokens/s, Avg generation throughput: 116.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usageINFO: 10INFO: 10.43.30.3:0 -INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
88
+ INFO 01-04 14:03:57 [loggers.py:111] Engine 000: Avg prompt throughput: 159.7 tokens/s, Avg generation throughput: 164.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usaINFO 01-04 14:04:03 [loggers.py:111] Engine 000: Avg prompt throughput: 161.7 tokens/s, Avg generationINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
89
+ INFO 01-04 14:04:07 [loggers.py:111] Engine 000: AINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
90
+ INFO: 1INFO 01-04 14:04:10 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:04:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thINFO 01-04 14:04:17 [loggers.py:111] Engine 000: Avg prompt INFO: 10.46.17.192:0 - "POST /v1/completions HTTPINFO 01-04 14:04:23 [loggers.py:111] Engine 000: Avg prompt throughput: 167.1 tokens/s, Avg generation thINFO 01-04 14:04:27 [loggers.py:111] Engine 000: Avg prompt throughput: 174.5 tokens/s, Avg generation throughputINFO 01-04 14:04:33 [loggers.py:111] Engine 000: Avg prompt throughput: 144.0 tokens/s, Avg generation throughput: 68.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 0.6%
91
+ INFO: 10.46.17.19INFO 01-04 14:04:37 [loggers.py:111] Engine 0INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
92
+ INFO 01-04 14:04:43 [loggers.py:111] Engine 000: Avg prompt throughput: 195.8 tokens/s, Avg generation throughput: 52.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cachINFO: 10.46.17.192:0 - "POST /v1/compleINFO: 10.45.190.192INFO 01-04 14:04:47 [loggers.py:111] Engine INFO 01-04 14:04:53 [loggers.py:111] Engine 000: Avg prompt throughput: 171.5 tokens/s, Avg generation throughput: 34.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:04:57 [loggers.py:111] Engine 00INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
93
+ INFO 01-04 14:05:03 [loggers.py:111] Engine 000: Avg prompt throughput: 183.8 tokens/s, Avg generation throughput: 32.2 tokens/s, Running: 1 reqs, WaitiINFO: 10.46.17.1INFO 01-04 14:05:07 [loggers.py:111] Engine 00INFO 01-04 14:05:13 [loggers.py:111] Engine 000: Avg prompt throughput: 131.3 tokens/s, Avg generation throughput: 61.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cINFO: 10.45.190.192:0 - "POST /v1/completIINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
94
+ INFO 01-04 14:05:23 [loggers.py:111] Engine 000: Avg prompt throughput: 204.0 tokens/s, Avg generation throughput: 70.4 tokens/s, Running: 2 reqs, WaitinINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OKINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
95
+ INFO 01-04 14:05:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 68.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.0%, Prefix cache hit rate: 2.2%
96
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
97
+ INFO 01-04 14:05:43 [loggers.py:111] Engine 000: Avg prompt throughput: 163.2 tokens/s, Avg generation throughput: 68.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cacheINFO 01-04 14:05:47 [loggers.py:111] EnginINFO 01-04 14:05:53 [loggers.py:111] Engine 000: Avg prompt throughput: 166.6 tokens/s, Avg generation throughput: 77.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 2.1%
98
+ INFO: 10.46.50.192:0 INFO 01-04 14:05:57 [loggers.py:111] EngiINFO 01-04 14:06:03 [loggers.py:111] Engine 000: Avg prompt throughput: 340.7 tokens/s, Avg generation throughput: 107.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 6.4%
99
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
100
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
101
+ INFO 01-04 14:06:13 [loggers.py:111] Engine 000: Avg prompt throughput: 152.7 tokens/s, Avg generation throughput: 92.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV INFO: 10.46.17.192:0 - "POST /v1/completionINFO: 10.45.190INFO 01-04 14:06:17 [loggers.py:111] Engine 000:INFO 01-04 14:06:23 [loggers.py:111] Engine 000: Avg prompt throughput: 168.2 tokens/s, Avg generation throughput: 82.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 9.1%
102
+ INFO 01-04 14:06:33 [loggers.py:111] Engine 000: Avg prompt throughput: 186.5 tokens/s, Avg generation throughput: 100.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 8.7%
103
+ INFO: 10.43INFO: 10.46.17.192:0 - "POST /v1/completions INFO 01-04 14:06:43 [loggers.py:111] Engine 000: Avg prompt throughput: 225.2 tokens/s, Avg generation throughput: 96.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 8.3%
104
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
105
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
106
+ INFO 01-04 14:06:53 [loggers.py:111] Engine 000: Avg prompt throughput: 212.9 tokens/s, Avg generation throughput: 138.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU INFO: 10.43.30.5:0 - "POST /v1/completions HTTINFO 01-04 14:07:03 [loggers.py:111] Engine 000: Avg prompt throughput: 225.8 tokens/s, Avg generation throughput: 101.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 7.6%
107
+ INFO: 10.INFO 01-04 14:07:07 [loggers.py:111] Engine 000: AvINFO 01-04 14:07:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 113.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU INFO: 10.46.17.192:0 - "POST /v1/completions HINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
108
+ INFO 01-04 14:07:23 [loggers.py:111] Engine 000: Avg prompt throughput: 239.1 tokens/s, Avg generation throughput: 114.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 7.3%
109
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
110
+ INFO 01-04 14:07:33 [loggers.py:111] Engine 000: Avg prompt throughput: 454.7 tokens/s, Avg generation throughput: 121.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 6.7%
111
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
112
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
113
+ INFO 01-04 14:07:43 [loggers.py:111] Engine 000: Avg prompt throughput: 242.1 tokens/s, Avg generation throughput: 95.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs,INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/INFO 01-04 14:07:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 6.5%
114
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
115
+ INFO 01-04 14:08:03 [loggers.py:111] Engine 000: Avg prompt throughput: 322.9 tokens/s, Avg generation throughput: 92.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 6.1%
116
+ INFO 01-04 14:08:13 [loggers.py:111] Engine 000: Avg prompt throughput: 234.5 tokens/s, Avg generation throughput: 109.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hit rate: 5.9%
117
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
118
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
119
+ INFO 01-04 14:08:23 [loggers.py:111] Engine 000: Avg prompt throughput: 222.1 tokens/s, Avg generation throughput: 95.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 5.7%
120
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
121
+ INFO 01-04 14:08:33 [loggers.py:111] Engine 000: Avg prompt throughput: 371.0 tokens/s, Avg generation throughput: 74.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 6.6%
122
+ INFO: 10.45.190.192:0 - "POST /v1/completions HINFO 01-04 14:08INFO 01-04 14:08:43 [loggers.py:111] Engine 000: Avg prompt throughput: 217.7 tokens/s, Avg generation throughput: 81.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache INFO: 10.43INFO 01-04 14:08:53 [loggers.py:111] Engine 000: Avg prompt throughput: 333.3 tokens/s, Avg generation throughput: 115.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.7%, Prefix cache hit rate: 6.1%
123
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
124
+ INFO 01-04 14:09:03 [loggers.py:111] Engine 000: Avg prompt throughput: 207.6 tokens/s, Avg generation throughput: 123.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.8%, Prefix cache hit rate: 6.0%
125
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
126
+ INFO 01-04 14:09:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cache hit rate: 6.0%
127
+ INFO 01-04 14:09:23 [loggers.py:111] Engine 000: Avg prompt throughput: 251.7 tokens/s, Avg generation throughput: 124.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.1%, Prefix cache hit rate: 5.8%
128
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
129
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
130
+ INFO 01-04 14:09:33 [loggers.py:111] Engine 000: Avg prompt throughput: 306.6 tokens/s, Avg generation throughput: 98.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit INFO: 1INFO 01-04 14:09:43 [loggers.py:111] Engine 000: Avg prompt throughput: 357.1 tokens/s, Avg generation throughput: 110.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 7.5%
131
+ INFO:INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
132
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
133
+ INFO 01-04 14:09:53 [loggers.py:111] Engine 000: Avg prompt throughput: 309.2 tokens/s, Avg generation throughput: 116.7 tokens/s, Running: 1 reqs, Waiting: INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200INFO 01-04 14:10:03 [loggers.py:111] Engine 000: Avg prompt throughput: 379.1 tokens/s, Avg generation throughput: 72.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 9.5%
134
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
135
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
136
+ INFO 01-04 14:10:13 [loggers.py:111] Engine 000: Avg prompt throughput: 379.0 tokens/s, Avg geINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
137
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 20INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
138
+ INFO 01-04 14:10:23 [loggers.py:111] Engine 000: Avg prompt throughput: 321.2 tokens/s, Avg generation throughput: 64.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 13.5%
139
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
140
+ INFO 01-04 14:10:33 [loggers.py:111] Engine 000: Avg prompt throughput: 475.5 tokens/s, Avg generation throughput: 57.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 12.9%
141
+ INFINFO 01-04 14:10:37 [loggers.py:111] Engine 000: AvINFO: 10INFO 01-04 14:10:43 [loggers.py:111] Engine 000: Avg prompt throughput: 292.6 tokens/s, Avg generation throughput: 44.1 tokens/s, Running: 1 reqs, Waiting:INFO 01-04 14:10:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 INFO: 10INFO 01-04 14:10:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
142
+ INFO 01-04 14:10:57 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:11:03 [loggers.py:111] Engine 000: Avg prompt throughput: 350.3 tokens/s, Avg generation throughput: 3.3 tokens/s, Running: 1 reqs, WaitiINFO 01-04 14:11:07 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:11:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, WaitINFO 01-04 14:11:17 [loggers.py:111] Engine 000: Avg prompt throughpINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
143
+ INFO 01-04 14:11:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 32.7 tokens/s, Running: 0 reqs, WaitiINFO 01-04 14:11:27 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:11:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, WaitiINFO 01-04 14:11:37 [loggers.py:111] INFO 01-04 14:11:39 [loggers.pINFO 01-04 14:11:53 [loggers.py:111] Engine 000: Avg prompt throughput: 257.2 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 12.7%
144
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
145
+ INFO 01-04 14:12:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 8.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.7%
146
+ INFO 01-04 14:12:13 [loggers.py:111] Engine 000: Avg prompt throughput: 209.4 tokens/s, Avg generation throughput: 1.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, Prefix cache hit rate: 12.5%
147
+ INFO 01-04 14:12:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.0%, Prefix cache hit rate: 12.5%
148
+ INFO 01-04 14:12:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 12.5%
149
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
150
+ INFO 01-04 14:12:43 [loggers.py:111] Engine 000: Avg prompt throughput: 725.6 tokens/s, Avg generation throughput: 86.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 12.3%
151
+ INFO 01-04 14:12:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 12.3%
152
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
153
+ INFO 01-04 14:13:03 [loggers.py:111] Engine 000: Avg prompt throughput: 232.7 tokens/s, Avg generation throughput: 85.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 12.1%
154
+ INFO 01-04 14:13:13 [loggers.py:111] Engine 000: Avg prompt throughput: 172.1 tokens/s, Avg generation throughput: 120.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 11.9%
155
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
156
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
157
+ INFO 01-04 14:13:23 [loggers.py:111] Engine 000: Avg prompt throughput: 288.1 tokens/s, Avg generation throughput: 124.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 11.7%
158
+ INFO: 10.46.17.192:0 - "POST /v1/completionINFO 01-04 14:13:25INFO 01-04 14:13:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 54.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cacheINFO: 10.46.1INFO 01-04 14:13:43 [loggers.py:111] Engine 000: Avg prompt throughput: 393.7 tokens/s, Avg generation throughput: 73.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 11.3%
159
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
160
+ INFO 01-04 14:13:53 [loggers.py:111] Engine 000: Avg prompt throughput: 376.6 tokens/s, Avg generation throughput: 114.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 12.2%
161
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
162
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
163
+ INFO 01-04 14:14:03 [loggers.py:111] Engine 000: Avg prompt throughput: 306.6 tokens/s, Avg generation throughput: 65.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3INFO: 10.43.30.3:0 - "POST /INFO 01-04 14:14:13 [loggers.py:111] Engine 000: Avg prompt throughput: 914.3 tokens/s, Avg generation throughput: 81.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cache hit rate: 11.2%
164
+ INFO: 10.46.17.192:0 - "POST /v1/compINFO 01-04 14:14:15 [loggINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
165
+ INFO 01-04 14:14:23 [loggers.py:111] Engine 000: Avg prompt throughput: 387.3 tokens/s, Avg generation throughput: 110.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usagINFO 01-04 14:14:26 [loggers.py:111] Engine 000: Avg prompt throughput: 712.5 tokens/s, Avg generation tINFO 01-04 14:14:33 [loggers.py:111] Engine 000: Avg prompt throughput: 323.5 tokens/s, Avg generation throughput: 58.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache uINFO 01-04 14:14:36 [loggers.py:111] EngiINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
166
+ INFO 01-04 14:14:43 [loggers.py:111] Engine 000: Avg prompt throughput: 299.0 tokens/s, Avg generation throughput: 92.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cacheINFO 01-04 14:14:46 [loggers.py:111] Engine 000: Avg prompt throughput: 402.2 tokens/s, Avg generation throINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
167
+ INFO 01-04 14:14:53 [loggers.py:111] Engine 000: Avg prompt throughput: 226.2 tokens/s, Avg generation througINFO: 10.45.1INFO 01-04 14:14:55 [loggers.py:111] Engine 000: AINFO 01-04 14:14:56 [loggers.py:111] EngineINFO 01-04 14:15:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/sINFO 01-04 14:15:05 [loggers.py:111] Engine 000: INFO 01-04 14:15:06 [loggers.py:111] EnginINFO 01-04 14:15:13 [loggers.py:111] Engine 000: Avg prompt throughput: 752.4 tokens/s, Avg generation throughput: 75.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 12.6%
168
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
169
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
170
+ INFO 01-04 14:15:23 [loggers.py:111] Engine 000: Avg prompt throughput: 477.7 tokens/s, Avg generation throughput: 89.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache INFO: 10.43.30.4:0INFO: 10.43.30.5INFO: 10.43.30.5:0INFO 01-04 14:15:26 [loggers.py:111] EnginINFO 01-04 14:15:33 [loggers.py:111] Engine 000: Avg prompt throughput: 428.7 tokens/s, Avg generation throughput: 65.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix caINFO 01-04 14:15:35 INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
171
+ INFO 01-04 14:15:43 [loggers.py:111] Engine 000: Avg prompt throughput: 466.7 tokens/s, Avg generation throughput: 91.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cacheINFO 01-04 14:15:46 [loggers.py:111] EngineINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
172
+ INFO 01-04 14:15:53 [loggers.py:111] Engine 000: Avg prompt throughput: 354.8 tokens/s, Avg generation throughput: 102.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
173
+ INFO 01-04 14:15:56 [loggers.py:111] Engine 000: Avg prompt throughput: 358.2 tokens/s, Avg generation throughINFO 01-04 14:16:03 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:16:05 [loggers.py:111] Engine 000INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
174
+ INFO 01-04 14:16:06 [loggers.py:111] Engine INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
175
+ INFO 01-04 14:16:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 28.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cachINFO 01-04 14:16:16 [loggers.py:111] Engine INFO 01-04 14:16:23 [loggers.py:111] Engine 000: Avg prompt throughput: 384.0 tokens/s, Avg generation throughput: 39.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, PrefINFO 01-04 14:16:25 [loggINFO 01-04 14:16:33 [loggers.py:111] Engine 000: Avg prompt throughput: 905.5 tokens/s, Avg generation throughput: 82.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache hit rate: 15.6%
176
+ INFO: 10.46.17.192:0 - "POST /v1/compINFO 01-04 14:16:35 [loggINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
177
+ INFO 01-04 14:16:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 72.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache INFO 01-04 14:16:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokenINFO 01-04 14:16:49 [loggerINFO 01-04 14:16:53 [loggers.py:111] Engine 000: Avg prompt throughput: 352.9 tokens/s, Avg generation througINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
178
+ INFO 01-04 14:16:56 [loggers.py:111] Engine INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
179
+ INFO 01-04 14:17:03 [loggers.py:111] Engine 000: Avg prompt throughput: 176.1 tokens/s, Avg generation througINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
180
+ INFO 01-04 14:17:06 [loggers.py:111] Engine INFO 01-04 14:17:13 [loggers.py:111] Engine 000: Avg prompt throughput: 378.8 tokens/s, Avg generation throughput: 52.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cachINFO 01-04 14:17:16 [loggers.py:111] Engine INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
181
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
182
+ INFO 01-04 14:17:23 [loggers.py:111] Engine 000: Avg prompt throughput: 384.4 tokens/s, Avg generation throINFO 01-04 14:17:26 [loggers.py:111] Engine 000: Avg prompt throughput: 437.5 tokens/s, Avg generation throughpuINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
183
+ INFO 01-04 14:17:33 [loggers.py:111] Engine 000: Avg prompt throughput: 390.2 tokens/s, Avg generation throughput: 50.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache INFO 01-04 14:17:36 [loggers.py:111] EnginINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
184
+ INFO 01-04 14:17:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 40.9 tokens/s, RunnINFO 01-04 14:17:45 [loggers.py:111] Engine 00INFO 01-04 14:17:46 [loggers.py:111] EnINFO 01-04 14:17:53 [loggers.py:111] Engine 000: Avg prompt throughput: 1134.6 tokens/s, Avg generation throughput: 39.0 tokens/s, RunINFO 01-04 14:17:55 [loggers.py:111] Engine 00INFO 01-04 14:17:56 [loggers.py:111] EngINFO: 10.46.17.192:0 - "POST /v1INFO 01-04 14:17:59 [loggers.pINFO 01-04 14:18:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 71.3 tokens/s, RunINFO 01-04 14:18:05 [loggers.py:111] Engine 000:INFO 01-04 14:18:06 [loggers.py:111] EINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
185
+ INFO 01-04 14:18:13 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:18:15 [loggers.py:111] Engine 000: Avg prompt throughput: 470.8 tokens/s, Avg generation throughput:INFO 01-04 14:18:16 [loggers.py:111] Engine 000: Avg prompt throughput: 397.0 tokens/s, Avg generationINFO 01-04 14:18:23 [loggers.py:111] Engine 000: Avg prompt throughput: 497.0 tokens/s, Avg generation throughput: 21INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
186
+ INFO: 10.43.30.3:0 - "POST /v1/comINFO: 10.46.17.192:0 - "POST /v1/coINFO 01-04 14:18:29 [loggerINFO 01-04 14:18:33 [loggers.py:111] Engine 000: Avg prompt throughput: 758.9 tokens/s, Avg generation throughput: 31.3 tokens/s, RunninINFO 01-04 14:18:35 [loggers.py:111] Engine 000: Avg prompt throughput: 421.0 tokenINFO 01-04 14:18:43 [loggers.py:111] Engine 000: Avg prompt throughput: 251.6 tokens/s, Avg generation throughput: 78.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cacINFO: 10.46.50.INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
187
+ INFO 01-04 14:18:53 [loggers.py:111] Engine 000: Avg prompt throughput: 613.8 tokens/s, Avg generation throughput: 98.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cacINFO 01-04 14:18:55INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
188
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
189
+ INFO 01-04 14:19:03 [loggers.py:111] Engine 000: Avg prompt throughINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
190
+ INFO 01-04 14:19:05 [loggers.py:111] Engine 000: Avg prompt throughput: 686.9 tokens/INFO: 10.46.17.192:0 - "POSTINFO 01-04 14:19:09 [loggers.py:11INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
191
+ INFOINFO 01-04 14:19:15 [loggers.py:111] Engine 000: Avg prompt throughput: 520.1 tokens/s, Avg generation througINFO 01-04 14:19:16 [loggers.py:111] Engine 000: Avg prompt throughput: 449.5 tokens/s, Avg generation thrINFO 01-04 14:19:23 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:19:25 [loggers.py:111] EngineINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
192
+ INFO 01-04 14:19:26 [loggers.py:111] EnINFO 01-04 14:19:33 [loggers.py:111] Engine 000: Avg prompt throughput: 457.7 tokens/s, Avg generation throughput: 69.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cacheINFO 01-04 14:19:35 [loggers.py:111] Engine INFO 01INFO 01-04 14:19:39 [loggers.py:INFO 01-04 14:19:43 [loggers.py:111] Engine 000: Avg prompt throughput: 290.5 tokens/s, Avg generation throughput: 54.8 tokens/s, RunninINFO 01-04 14:19:45 [loggers.py:111] EnginINFO 01-04 14:19:46 [loggers.py:111] EngiINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
193
+ INFO 01-04 14:19:53 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:19:55 [loggers.py:111] Engine 000: Avg prompt throughput: 567.7 tokens/s, Avg generation througINFO 01-04 14:19:56 [loggers.py:111] EINFO 01-04 14:20:03 [loggers.py:111] Engine 000: Avg prompt throughput: 757.3 tokens/s, Avg generation throughput: 73.4 tokens/s, Running:INFO 01-04 14:20:05 [loggers.py:111] Engine 000: Avg prompt throughput: 696.2 tokens/s, Avg generation throughINFO 01-04 14:20:06 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:20:09 [loggers.pINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
194
+ INFO 01-0INFO 01-04 14:20:15 [loggers.py:111] Engine 000: Avg prompt throughput: 399.1 tokens/s, Avg generation througINFO 01-04 14:20:16 [loggers.py:111] Engine 000: Avg prompt throughput: 580.1 tokens/s, Avg generatioINFO 01-04 14:20:23 [loggers.py:111] Engine 000: Avg prompt throughput: 458INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
195
+ INFO 01-04 14:20:25 [loggers.py:111] EngiINFO 01-04 14:20:26 [loggers.py:111] INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
196
+ INFO 01-04 14:20:33 [loggers.py:111] Engine 000: Avg prompt throughput: 560.8 tokens/s, Avg generation throughput: 56.7 tokens/s, Running: 1INFO 01-04 14:20:35 [loggers.py:111] EngineINFO: 10.46.50.192:0 - "POST /v1INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
197
+ INFO 01-04 14:20:43 [loggers.py:111] Engine 000: Avg prompt throughput: 7INFO 01-04 14:20:45 [loggers.py:111] Engine 000: Avg prompt throughput: 547.0 tokens/s, Avg generation throughput: 30.1 tokens/s, Running: 1 reqs,INFO 01-04 14:20:53 [loggers.py:111] Engine 000: Avg prompt throughput: 643.3 tokens/s, Avg generation throughput: 54.1 tokens/s, Running: INFO 01-04 14:20:55 [loggers.py:111] Engine 000: Avg prompt throughput: 680.4 toINFO 01-04 14:21:03 [loggers.py:111] Engine 000: Avg prompt throughput: 599.1 tokens/s, Avg generation throughput: 104.5 tokens/s, Running:INFO 01-04 14:21:05 [loggers.py:111] Engine 000: Avg prompt throughput: 608.1 tokINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
198
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
199
+ INFO 01-04 14:21:13 [loggers.py:111] Engine 000: Avg prompt throughput: 5INFO 01-04 14:21:15 [loggers.py:111] Engine 000: Avg prompt throughput: 862.1 tokens/s, Avg generation throughput: 60.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache INFO 01-04 14:21:16 [loggers.py:111] EnginINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" INFO 01-04 14:21:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFO 01-04 14:21:25 [loggers.py:111] Engine 000: Avg prompt throughput: 485.5 tokens/s, Avg generation throughINFO 01-04 14:21:26 [loggers.py:1INFO 01-04 14:21:33 [loggers.py:111] Engine 000: Avg prompt throughput: 617.INFO 01-04 14:21:35 [loggers.py:111] Engine 000: Avg prompt throughput: 426.4 tokens/s, Avg generation throughINFO 01-04 14:21:36 [loggers.py:1INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
200
+ INFO 01-04INFO 01-04 14:21:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cacheINFO 01-04 14:21:46 [loggers.py:111INFO 01-04 14:21:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
201
+ INFO 01-04 14:21:55 [loggers.py:111] Engine 000: Avg prompt throughput: 1617.INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
202
+ INFO 01-04 14:22:03 [loggers.py:111] Engine 000: Avg prompt throughput: 555.2 tokens/s, Avg generation throughput: 54.1 tokens/s, Running: 1INFO 01-04 14:22:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokINFO 01-04 14:22:13 [loggers.py:111] Engine 000: Avg prompt throughput: 731.4 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cacheINFO 01-04 14:22:INFO 01-04 14:22:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 91.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.7%, Prefix cache hit rate: 16.0%
203
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
204
+ INFO 01-04 14:22:33 [loggers.py:111] Engine 000: Avg prompt throughput: 604.4 tokens/s, Avg generation throughput: 101.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cache hit rate: 17.1%
205
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTINFO 01-04 14:2INFO 01-04 14:22:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 52.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hINFO 01-04 14:2INFO 01-04 14:22:53 [loggers.py:111] Engine 000: Avg prompt throughput: 510.3 tokens/s, Avg generation throughput: 69.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 16.9%
206
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTPINFO 01-04 14INFO 01-04 14:23:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 53.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rINFO 01-04 INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
207
+ INFO 01-04 14:23:13 [loggers.py:111] Engine 000: Avg prompt throughput: 341.0 tokens/s, Avg generation throughput: 52.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit ratINFO: INFO 01-04 14:23:23 [loggers.py:111] Engine 000: Avg prompt throughput: 1349.9 tokens/s, Avg generation throughput: 83.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.1%, Prefix cache hit rate: 16.9%
208
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
209
+ INFO 01-04 14:23:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 108.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.7%, Prefix cache hit rate: 16.9%
210
+ INFO: 10.46.17.192:0 - "POST INFO 01-04 14:23:36 [loggers.py:1INFO 01-04 14:23:43 [loggers.py:111] Engine 000: Avg prompt throughput: 877.1 tokens/s, Avg generation throughput: 84.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.6%, Prefix cache hitINFO 01-04 14:23:45 [loggers.py:111] Engine 00INFO 01-04 14:23:46 [loggers.py:1INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
211
+ INFO 01-04 14:23:53 [loggers.py:111] Engine 000: Avg prompt throughput: 697INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
212
+ INFO 01-04 14:23:55 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 toINFO 01-04 14:24:03 [loggers.py:111] Engine 000: Avg prompt throughput: 312.1 tokens/s, Avg generation throughput: 62.8 tokens/s, Running: INFO 01-04 14:24:05 [loggers.py:111] Engine 000: Avg prompt throughput: 1436.9 tINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
213
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
214
+ INFO 01-04 14:24:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 INFO 01-04 14:24:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughpuINFO 01-04 14:24:16 [loggers.pyINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
215
+ INFO 01-04 14:24:23 [loggers.py:111] Engine 000: Avg prompt throughput: 1238.8 tokens/s, Avg generation throughput: 40.0 tokens/s, Running: 1 INFO: 10.43.30.5:0 - "POST /v1/completiINFO 01-04 14:24:26 [loggers.py:111INFO 01-04 14:24:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.INFO: 10.46.50.192:0 - "POST INFO 01-04 14:24:43 [loggers.py:111] Engine 000: Avg prompt throughput: 714.5 tokens/s, Avg generation throughput: 80.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.6%, Prefix cache hINFO 01-04 14:24:45 [loggers.py:111] Engine 000: Avg prompt throughput: 760.1 tINFO 01-04 14:24:53 [loggers.py:111] Engine 000: Avg prompt throughput: 584.7 tokens/s, Avg generation throughput: 84.8 tokens/s, Running: 2INFO 01-04 14:24:55 [loggers.py:111] Engine 000: Avg prompt throughput: 736.1 tINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
216
+ INFO 01-04 14:25:03 [loggers.py:111] Engine 000: Avg prompt throughput: 366.1 tokens/s, Avg generation throughput: 93.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2INFO 01-04 14:25:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generaINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
217
+ INFO 01-04 14:25:13 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:25:15 [loggers.py:111] Engine 000: INFO 01-04 14:25:16 [loggers.py:111] Engine 000: Avg prompt throughput: 681.3 tokens/s, Avg geneINFO 01-04 14:25:23 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:25:25 [loggers.py:111] Engine 000: Avg prompt throughput: 349.5 tokens/s, Avg generation throughput: 4INFO 01-04 14:25:26 [loggers.py:INFO 01-04 14:25:33 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:25:35 [loggers.py:111] Engine 000: Avg prompt throughput: 778.8 tokens/s, Avg generation throughput: 8INFO 01-04 14:25:36 [loggers.py:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
218
+ INFO INFO 01-04 14:25:45 [loggers.py:111] Engine 000: Avg prompt throughput: 786.8 tokens/s, Avg generation throughput: 8INFO 01-04 14:25:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1INFO 01-0INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
219
+ INFO: 10.45.190.192:0 - "POST /v1/completiINFO 01-04 14:25:56 [loggers.py:111] Engine 000: Avg prompt throughput: 600.3 tokens/s, Avg generaINFO 01-04 14:26:03 [loggers.py:111] Engine 000: Avg prompt throughput: 1210.4 tokens/s, Avg generation throughput: 56.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.2%, Prefix cache hit INFO 01-04 1INFO 01-04 14:26:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 83.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.5%, Prefix cache hit INFO 01-04 1INFO 01-04 14:26:23 [loggers.py:111] Engine 000: Avg prompt throughput: 768.1 tokens/s, Avg generation throughput: 128.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 12.0%, Prefix cacheINFO 01-04 14:26:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
220
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
221
+ INFO 01-04 14:26:33 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:26:35 [loggers.py:111] Engine 000: Avg prompt throughput: 731.0 tokens/s, Avg generation throughputINFO 01-04 14:26:36 [loggers.py:111] INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
222
+ ININFO 01-04 14:26:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usINFO 01-04 14:26:46 [loggers.py:111] EngINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 INFINFO 01-04 14:26:55 [loggers.py:111] Engine 000: Avg prompt throughput: 582.9 tokens/s, Avg generation throughINFO 01-04 14:26:56 [loggers.py:111] Engine 000: Avg prompt throughput: 624.1 tokens/s, Avg generation througINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 20INFO 01-04 14:27:03 [loggers.py:111] Engine 000:INFO 01-04 14:27:06 [loggers.py:111] Engine 000: Avg prompt throughput: 587.0 tokens/s, Avg generation throughput: 59.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU INFO 01-04 14:27:13 [loggers.py:111] Engine 000: AvINFO: 10.46.50.19INFO 01-04 14:27:15 [loggers.py:111] Engine 0INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
223
+ INFO 01-04 14:27:16 [loggers.py:111]INFO 01-04 14:27:23 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:27:25 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.7 tokens/s, Running: 1 reqs, WaiINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
224
+ INFO 01-04 14:27:33 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:27:35 [loggers.py:111] Engine 000: Avg prompt throughput: 802.7 tokens/s, Avg generation throughput:INFO 01-04 14:27:36 [loggers.py:11INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
225
+ INFO 01-04 14:27:43 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:27:45 [loggers.py:111] Engine 000: AINFO 01-04 14:27:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatiINFO 01-04 14:27:53 [loggers.py:111] Engine 000: Avg prompt throughput: 829.6 tokens/s, Avg generation throughput: 31.2 tokens/s, RunningINFO 01-04 14:27:55 [loggers.py:111] Engine 00INFO 01-04 14:27:56 [loggers.py:111]INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
226
+ INFO 01-04 14:28:03 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:28:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: INFO 01-04 14:28:06 [loggers.py:1INFO 01-04 14:28:13 [loggers.py:111] Engine 000: Avg prompt throughput: 615.0 tokens/s, Avg generation throughput: 14.0 tokens/s, RunningINFO 01-04 14:28:15 [loggers.py:111] Engine 000INFO 01-04 14:28:16 [loggers.py:111INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
227
+ INFO 01-04 14:28:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFO 01-04 14:28:25 [loggers.py:111] Engine 000INFO 01-04 14:28:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO 01-04 14:28:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs,INFO 01-04 14:28:35 [loggers.py:111] Engine INFO 01-04 14:28:36 [loggers.INFO 01-04 14:28:43 [loggers.py:111] Engine 000: Avg prompt throughput: 885.1 tokens/s, Avg generation throughput: 43.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: INFO 01-04 14:28:45 [loggers.py:111] EngineINFO 01-04 14:28:46 [loggers.INFO 01-04 14:28:53 [loggers.py:111] Engine 000: Avg prompt throughput: 635.7 tokens/s, Avg generation throughput: 4.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%,INFO 01-04 14:28:56 [loggers.pINFO 01-04 14:29:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%INFO 01-04 14:29:06 [loggers.pyINFO 01-04 14:29:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, INFO 01-04 14:29:16 [loggers.INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
228
+ INFO 01-04 14:29:23 [loggers.py:111] Engine 000: Avg prompt throughput: 669.3 tokens/s, Avg generation throughput: 48.5 tINFO 01-04 14:29:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatiINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
229
+ INFO 01-04 14:29:33 [loggers.py:111] Engine 000: Avg INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
230
+ INFO 01-04 14:29:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatiINFO 01-04 14:29:43 [loggers.py:111] Engine 000: Avg prompt throughput: 700.5 tokens/s, Avg generation throughput: 25.4INFO 01-04 14:29:46 [loggers.py:111] Engine 000: Avg prompt throughput: 838.5 tokens/s, Avg generatiINFO 01-04 14:29:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tINFO 01-04 14:29:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPINFO 01-04 14:30:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 toINFO 01-04 14:30:05 [loggers.py:111] EngiINFO 01-04 14:30:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatiINFO 01-04 14:30:13 [loggers.py:111] Engine 000: Avg prompt throughput: 614.1 tokens/s, Avg generation throughput: 20.4INFO 01-04 14:30:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generationINFO 01-04 14:30:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.INFO 01-04 14:30:26 [loggers.py:111] Engine 000: Avg prompt throughput: 1149.3 tokens/s, Avg generatiINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
231
+ INFO 01-04 14:30:33 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:30:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, INFO 01-04 14:30:43 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:30:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
232
+ INFO 01-04 14:30:53 [loggers.py:111] Engine 000: Avg INFO 01-04 14:30:56 [loggers.py:111] Engine 000: Avg prompt throughput: 875.2 tokens/s, Avg generation throughput: 20.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, INFO 01-04 14:31:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokeINFO 01-04 14:31:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generINFO 01-04 14:31:13 [loggers.py:111] Engine 000: Avg prompt throughput: 779.9 tokens/s, Avg generation throughput: 29.9 toINFO 01-04 14:31:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO 01-04 14:31:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tINFO 01-04 14:31:26 [loggers.py:111] Engine 000: Avg prompt throughput: 799.2 tokens/s, Avg generation throughput: 18.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, INFO 01-04 14:31:33 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:31:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs,INFO 01-04 14:31:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokINFO 01-04 14:31:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generaINFO 01-04 14:31:53 [loggers.py:111] Engine 000: Avg prompt throughput: 688.5 tokens/s, Avg generation throughput: 34.5 INFO 01-04 14:31:56 [loggINFO 01-04 14:32:05 [loggers.py:111] Engine 000: Avg prompt throughput: 650.2 tokens/s, Avg generation throughput: 41.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, PrefINFO 01-04 14:32:06 [loggINFO 01-04 14:32:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, PrefINFO 01-04 14:32:16 [loggINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
233
+ INFO 01-04 14:32:25 [loggers.py:111] Engine 000: Avg prompt throughput: 544.9 tokens/s, Avg generation throughput: 52.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, PINFO 01-04 14:32:26 [loggersINFO 01-04 14:32:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPUINFO 01-04 14:32:39 [loggers.py:111] Engine 000: AvgINFO 01-04 14:32:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%,INFO 01-04 14:32:56 [loggers.py:111] Engine 000: Avg prompt throughput: 710.3 tokens/s, Avg generation throughput: 33.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 15.6%
234
+ INFO 01-04 14:33:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 15.6%
235
+ INFO: 10.45.190.192:0 - "POST /v1/compleINFO 01-04 14:33:49 [loggers.py:111] Engine 000: Avg prompt throughput: 684.6 tokens/s, Avg generation throughput: 9.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7INFO 01-04 14:33:53 [loggers.py:INFO 01-04 14:33:59 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%INFO 01-04 14:34:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO 01-04 14:34:09 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 12.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%INFO 01-04 14:34:13 [loggers.pyINFO 01-04 14:34:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%,INFO 01-04 14:34:23 [loggers.pINFO 01-04 14:34:59 [loggers.py:111] Engine 000: Avg prompt throughput: 779.7 tokens/s, Avg generation throughput: 44.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 17.1%
236
+ INFO 01-04 14:35:09 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 17.1%
237
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
238
+ INFO 01-04 14:35:19 [loggers.py:111] Engine 000:INFO 01-04 14:35:25 [loggers.py:111] Engine 000: Avg prompt throughput: 608.8 tokens/s, Avg generation throughput: 24.4 tokens/s, Running: INFO 01-04 14:35:33 [loggers.py:111] Engine 000: Avg INFO 01-04 14:35:36 [loggers.py:111] Engine 000: Avg prompt throughput: 796.6 tokens/s, Avg generation throughput: 19.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPINFO 01-04 14:35:43 [loggers.py:111] Engine 000: Avg INFO 01-04 14:35:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPINFO: 10.46.50.192:0 - "POINFO: 10.43.30.4:0 INFO 01-04 14:35:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 17.3%
239
+ INFO 01-04 14:36:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 17.3%
240
+ INFO: 10INFO 01-04 14:36:43 [loggers.py:111] Engine 000: Avg prompt throughput: 291.8 tokens/s, Avg generation throughput: 32.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 16.0%
241
+ INFO 01-04 14:36:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 16.0%
242
+ INFO 01-04 14:37:03 [loggers.py:111] Engine 000: AvgINFO 01-04 14:37:06 [loggers.py:111] Engine 000: Avg prompt throughput: 1216.2 tokens/s, Avg generation throughput: 40.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, INFO 01-04 14:37:13 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:37:16 [loggers.py:111] INFO 01-04 14:37:29 [loggers.py:111] Engine 000: Avg prompt throughput: 297.7 tokens/s, Avg generation throughput: 18.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 17.3%
243
+ INFO 01-04 14:37:39 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 17.3%
244
+ INFO 01-04 14:37:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 17.3%
245
+ INFO 01-04 14:37:59 [loggerINFO 01-04 14:38:16 [loggers.py:111] Engine 000: Avg prompt throughput: 363.3 tokens/s, Avg generation throughput: 28.4 tokens/s, Running: 1 reqs, Waiting: 0 reqINFO 01-04 14:38:23 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:38:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg geneINFO 01-04 14:38:36 [loggers.py:111] Engine 000: Avg prompt throughput: 624.1 tokens/s, Avg generation throughput: 34.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%,INFO 01-04 14:38:36 [loggers.pINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
246
+ INFO 01-04 14:38:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 28.1 tokINFO 01-04 14:38:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generaINFO 01-04 14:38:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokINFO 01-04 14:39:26 [loggers.py:111] Engine 000: Avg prompt throughput: 649.9 tokens/s, Avg geneINFO 01-04 14:40:46 [loggers.py:111] Engine 000: Avg prompt throughput: 1023.4 tokens/s, Avg generation throughput: 12.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 16.2%
247
+ INFO 01-04 14:40:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 16.2%
248
+ INFO 01-04 14:41:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 16.2%
249
+ INFO 01-04 14:41:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.INFO 01-04 14:42:06 [loggers.py:111] Engine 000: Avg prompt throughput: 1721.1 tokens/s, Avg generation throughput: 23.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.9%, Prefix cache hit rate: 17.7%
250
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
251
+ INFO 01-04 14:42:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 36.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.7%
252
+ INFO 01-04 14:42:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.7%
253
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
254
+ INFO 01-04 14:45:56 [loggers.py:111] Engine 000: Avg prompt throughput: 1917.4 tokens/s, Avg generation throughput: 34.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.9%
255
+ INFO 01-04 14:46:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.9%
256
+ INFO 01-04 14:53:06 [loggers.py:111] Engine 000: Avg prompt throughput: 410.1 tokens/s, Avg generation throughput: 42.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 18.8%
257
+ INFO 01-04 14:53:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 18.8%
258
+ INFO 01-04 14:53:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 18.8%
259
+ INFO 01-04 14:53:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 18.8%
260
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
261
+ INFO 01-04 14:53:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 11.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.8%
262
+ INFO 01-04 14:53:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.8%
263
+ INFO 01-04 15:05:56 [loggers.py:111] Engine 000: Avg prompt throughput: 513.0 tokens/s, Avg generation throughput: 24.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 18.6%
264
+ INFO 01-04 15:06:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 18.6%
265
+ INFO 01-04 15:06:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/sINFO 01-04 15:07:55 [loggers.py:111] Engine 000: Avg prompt throughput: 524.6 tokens/s, Avg generation throughput: 38.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 19.4%
266
+ INFO 01-04 15:08:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 19.4%
267
+ INFO 01-04 15:08:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 19.4%
268
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
269
+ INFO 01-04 15:08:25 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 37.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.4%
270
+ INFO 01-04 15:08:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.4%
271
+ Avg generation throughput: 33.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.6%
272
+ INFO 01-04 15:07:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.6%
273
+ py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 16.7%
274
+ INFO 01-04 15:02:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 16.7%
275
+ INFO 01-04 15:02:59 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 16.7%
276
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
277
+ INFO 01-04 15:03:09 [loggers.py:111] Engine 0INFO 01-04 15:10:27 [loggers.py:111] Engine 000: Avg prompt throughput: 553.8 tokens/s, Avg generation throughput: 12.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 12.5%
278
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
279
+ INFO 01-04 15:10:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 28.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.5%
280
+ INFO 01-04 15:10:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 12.5%
hf_ip/vllm_gpu4.log ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
2
+ warnings.warn(
3
+ INFO 01-04 13:13:36 [__init__.py:239] Automatically detected platform cuda.
4
+ INFO 01-04 13:13:38 [api_server.py:1043] vLLM API server version 0.8.5
5
+ INFO 01-04 13:13:38 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8005, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
6
+ INFO 01-04 13:13:46 [config.py:717] This model supports multiple tasks: {'score', 'reward', 'generate', 'embed', 'classify'}. Defaulting to 'generate'.
7
+ INFO 01-04 13:13:47 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
8
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
9
+ warnings.warn(
10
+ INFO 01-04 13:13:51 [__init__.py:239] Automatically detected platform cuda.
11
+ INFO 01-04 13:13:54 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
12
+ WARNING 01-04 13:13:55 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7f17117328f0>
13
+ INFO 01-04 13:13:56 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
14
+ INFO 01-04 13:13:56 [cuda.py:221] Using Flash Attention backend on V1 engine.
15
+ WARNING 01-04 13:13:56 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
16
+ INFO 01-04 13:13:56 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+ INFO 01-04 13:15:40 [loader.py:458] Loading weights took 104.27 seconds
35
+ INFO 01-04 13:15:41 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 104.572266 seconds
36
+ INFO 01-04 13:15:55 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
37
+ INFO 01-04 13:15:55 [backends.py:430] Dynamo bytecode transform time: 14.47 s
38
+ INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later use
39
+ INFO 01-04 13:16:50 [backends.py:148] Compiling a graph for general shape takes 53.37 s
40
+ INFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 67.84 s in total
41
+ INFO 01-04 13:17:54 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
42
+ INFO 01-04 13:17:54 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94x
43
+ INFO 01-04 13:18:32 [gpu_model_runner.py:1686] Graph capturing finished in 39 secs, took 1.21 GiB
44
+ INFO 01-04 13:18:33 [core.py:159] init engine (profile, create kv cache, warmup model) took 171.77 seconds
45
+ INFO 01-04 13:18:33 [core_client.py:439] Core engine process 0 ready.
46
+ WARNING 01-04 13:18:33 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
47
+ INFO 01-04 13:18:33 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
48
+ INFO 01-04 13:18:33 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
49
+ INFO 01-04 13:18:33 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8005
50
+ INFO 01-04 13:18:33 [launcher.py:28] Available routes are:
51
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /openapi.json, Methods: GET, HEAD
52
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /docs, Methods: GET, HEAD
53
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: GET, HEAD
54
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /redoc, Methods: GET, HEAD
55
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /health, Methods: GET
56
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /load, Methods: GET
57
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /ping, Methods: POST, GET
58
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /tokenize, Methods: POST
59
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /detokenize, Methods: POST
60
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/models, Methods: GET
61
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /version, Methods: GET
62
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
63
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/completions, Methods: POST
64
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/embeddings, Methods: POST
65
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /pooling, Methods: POST
66
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /score, Methods: POST
67
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/score, Methods: POST
68
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
69
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /rerank, Methods: POST
70
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/rerank, Methods: POST
71
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v2/rerank, Methods: POST
72
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /invocations, Methods: POST
73
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /metrics, Methods: GET
74
+ INFO: Started server process [1317292]
75
+ INFO: Waiting for application startup.
76
+ INFO: Application startup compININFO 01-04 14:00:00 [loggers.py:111] Engine 000: Avg prompt throughput: 71.4 tokens/s, Avg generation throughput: 37.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.4%, Prefix cache hit rate: 0.0IIINFO 01-04 14:00:10 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.6%, Prefix cache hit rate: 0.0ININFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
77
+ INFO 01-04 14:00:20 [loggers.py:111] Engine 000: Avg prompt throughput: 58.3 tokens/s, Avg generation throughput: 48.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.3%, Prefix cache hit rate: 0INFIINFO 01-04 14:00:30 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.4%, Prefix cache hit rate: 0.0ININFO 01-04 14:00:40 [loggers.py:111] Engine 000: Avg prompt throughput: 122.0 tokens/s, Avg generation throughput: 96.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 0.6%
78
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.INFO 01-04INFO 01-04 14:00:50 [loggers.py:111] Engine 000: Avg prompt throughput: 63.6 tokens/s, Avg generation throughput: 100.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 1.0%
79
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.INFO 01-INFO 01-04 14:00:57 [loggers.py:111] Engine 000: Avg prompt tINFO: INFO 01-04 14:01:00 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 80.3 tokens/s, Running: 1 rINFO 01-04 14:01:05 [loggers.py:111] Engine 000: Avg prompt throughput: 6INFINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
80
+ INFO 01-04 14:01:10 [loggers.py:111] Engine 000: Avg prompt throughput: 66.4 tokens/s, Avg generation throughput: 74.0 tokens/s, Running:INFO 01-04 1INFO 01-04 14:01:17 [loggers.py:111] Engine 000: Avg prompt throughIINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
81
+ INFO 01-04 14:01:20 [loggers.py:111] Engine 000: Avg prompt throughput: 85.7 tokens/INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1"INFO 01-INFO INFO 01-04 14:01:27 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:01:30 [loggers.py:111] Engine 000: Avg prompt throughput: 104.9 tokens/s, Avg generation throughput: 65.9 tokens/s, Running: 2 reqs, WINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
82
+ INFO 0INFO 01-04 14:01:40 [loggers.py:111] Engine 000: Avg prompt throughput: 95.6 tokens/s, Avg generation throughput: 113.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hINFO: INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1ININFO 01INFO 01-04 14:01:47 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14INFO 01-04 14:01:50 [loggers.py:111] Engine 000: Avg prompt throughput: 83.3 tokens/s, Avg generation throughput: 90.9 tokens/s, Running: 2 reqsINFO 01-04 14:01:57 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:01:57 [loggers.py:111] Engine 000: Avg prompt throughput: 75.7 tokens/s, Avg generation throughput: 85.5 tokens/s, Running: 2 rINFO 01-04 14:02:0INFO 01-04 14:02:06 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:02:07 [loggers.py:111] Engine 000: Avg prompt throughput: 81.INFO 01-04 14:02:07 [loggers.py:111] Engine 000: INFO 01-04 INFO 01-04 14:02:10 [loggers.py:111] Engine 000: Avg prompt throughput: 1INFO 01-0INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
83
+ INFO 01-0INFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompt throughput: 73.3 tokens/s, Avg generation throughput: 89.4INFO 01-0INFO 01-04 14:02:20 [loggers.py:111] Engine 000: Avg prompt throughput: INFO: INFO 01-04INFO 01-04 14:02:27 [loggers.py:111] Engine 000: Avg prompt throughput: 83.8 tokens/s, Avg generation throughput: 122.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usageINFO: INFO 01-04INFO 01-04 14:02:33 [loggers.py:111] Engine 000: Avg prompt throughput: 87.INFO 01-04 14:02:37 [loggers.py:111] Engine 000: Avg prompt throughput: 118.8 tokens/s, Avg generation throughput: 81.1 tokens/s, RunINFO 01-04INFO 01-04 14:02:43 [loggers.py:111] Engine 000: Avg prompt throughput: 105.1 tokens/s, Avg generation throughput: 151.5 tokens/s,INFO 01-0INFO 01-04 14:02:47 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:02:50 [loggers.py:111] Engine 000: Avg prompt throughput: 101.4 tokens/s, Avg generation throughput: 91.6 tokens/s, Running: 2 reqsINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 2INFO 01-04 INFO 01-04 14:02:56 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:03:00 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 70.3 tokens/s, Running: 1INFO 01-04 14:03:05 [loggers.py:111] Engine 000: Avg prompt throughput: 111.1 IINFO 01-04 14:03:07 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:03:10 [loggers.py:111] Engine 000: Avg prompt throughput: 67INFO 01-04 14:03:15 [loggers.py:111] Engine 000: Avg prompt throughput: 73.1 tokens/s, Avg generation throughput: 125.7 tokens/s, Running: 3 reINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
84
+ INFO 01-04 14:03:20 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:03:25 [loggers.py:111] Engine 000: Avg prompt throughput: 101.4 tokens/s, Avg generation throughput: 149.7 tokens/s, Running: 3 rIININFO 01-04 14:03:30 [loggers.py:111] Engine 000: Avg prompt throughput: 93.3 tokens/s, Avg generation throughput: 74.1 tokens/s, Running: 2INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
85
+ INFO 01-0INFINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
86
+ INFO 01-04 14:03:40 [loggers.py:111] Engine 000: Avg prompt throughput: 119.4 tokens/s, Avg generation throughput: 105.1 tokens/s, Running: 2 reqs, WaIINFO 01-04 14:03:47 [loggers.py:111] Engine 000: Avg prompt throughpINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
87
+ INFO 01-04 14:03:50 [loggers.py:111] Engine 000: Avg prompt throughput: 134.9 tokensINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1"INFO: INFO 01-04 14:03:57 [loggers.py:111] Engine 000: Avg prompt throughput: 114.2 tokens/s, Avg generation throughput: 100.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cINFO 01-04 14:04:00 [loggers.py:111] Engine 000: Avg prompt throughput: 141.3 INFO 0INFO 01-04 14:04:07 [loggers.py:111] Engine 000: Avg prompt throughput: 160.8 tokens/s, Avg generation throughput: 90.1 tokens/s, RunnINFO 01-04 14:04:10 [loggers.py:111] Engine 000: Avg prompt throughput: 172.8 tokens/s, Avg generation throughput: 57.7 tokens/s, Running: 2 rINFO 01-04 14:04:15 [loggers.py:111] Engine 000: Avg prompt throughput: INININFO 01-04 14:04:20 [loggers.py:111] Engine 000: Avg prompt throughput: 120.0 tokens/s, Avg generation throughput: 100.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hitINFO: 10INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/INFO 01-0INFO 01-04 14:04:27 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:04:30 [loggers.py:111] Engine 000: Avg prompt throughput: 224.8 tokens/s, Avg generation throughput: 95.4 tokens/s, Running: 2 reqs,INFO INFO 01-04 14:04:37 [loggers.py:111] Engine 000: Avg prompt throughINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
88
+ INFO 01-04 14:04:40 [loggers.py:111] Engine 000: Avg prompt throughput: 174.6 tokens/s, Avg generation throughput: 92.4 tokens/s, Running: 2 reIINFO 01-INFO 01-04 14:04:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 6INFO: 10.45.190INFO 01-04 14:04:50 [loggers.py:111] Engine 000: Avg prompt throughput: 175.2INFO 01-04 14:04:53 [loggers.py:111] Engine 000: Avg prompt throINFO 01-INFO 01-04 14:04:57 [loggers.py:111] Engine 000: Avg prompt throughpuINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
89
+ INFO: 1INFO 01-04 14:05:03 [loggers.py:111] Engine 000: Avg prompt throughputINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
90
+ INFO 01-04 14:05:07 [loggers.py:111] Engine 000: Avg prompt throughput: 138.2 tokens/s, Avg generation throughput: INFO 01-04 14:05:07 [lINFO 01-04 14:05:10 [loggers.py:111] Engine 000: Avg prompt throughput: 195.9 tokINFO 01-04 14:05:17 [loggers.py:111] Engine 000: Avg prompt throughput: 163.3 tokens/s, Avg generation throughput: 83.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usageINFO 01-04 14:05:17 [INFO 01-04 14:05:20 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 1INFO 01-04 14:05:27 [loggers.py:111] Engine 000: Avg prompt throughput: 147.4 tokens/s, Avg generation throughput:INFO 01-04 14:05:27 [loggers.py:111] Engine 000: Avg prompt throughput: 148.2 tokens/sINFO 01-04 14:05:3INFO: 10.43.30.5:0 - "POST /v1/completions HTTPINFO 01-04 14INFO 01-04 14:05:37 [loggers.py:111] Engine 000: Avg prompt throughput: 121.6 tokens/s, Avg generation throughput: 103.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix caINFO 0INFO: 10.4INFO 01-04 14:05:47 [loggers.py:111] Engine 000: Avg prompt throughput: 144.5 tokens/s, Avg generation throughput: 114.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
91
+ INFO 01-04 14:05:50 [logINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
92
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
93
+ INFO 01-04 14:05:57 [loggers.py:111] Engine 000: Avg prompt throughput: 191.0 tokens/s, Avg generation throughput: 100.5 tokens/s, RunninINFOINFO 01-04 14:06:03 [loggers.py:111] Engine 000: Avg prompt throughput: 194.0 tINFO 01-04 14:06:07 [loggers.py:111] Engine 000: Avg prompt throughput: 209.2 tokens/s, Avg generation throughput: 76.9 tokens/s, Running: INFO 01-04 14:06:07 [loggers.py:111] Engine 000: Avg prompt throughput: 210.9 toINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
94
+ INFO 01-04 14:06:17 [loggers.py:111] Engine 000: Avg prompt throughput: 226.9 tokens/s, Avg generation throughput: 88.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1INFO 01-04 14:06:20 [loggers.py:111] Engine 000: Avg prompt throughput: 505.5 tokens/s, Avg generaINFO 01-04 14:06:27 [loggers.py:111] Engine 000: Avg prompt throughput: 175.3 tokens/s, Avg generation throughput: 86.4 INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
95
+ INFO: 10.46.17.192:0 - "POST /vINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
96
+ INFO 01-04 14:06:37 [loggers.py:111] Engine 000: Avg prompt throughput: 178.1 tokens/s, Avg generation throughput: 107.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KINFO 01-04 14INFO: 10.45.190.192:0 - "POST /v1INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
97
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
98
+ INFO 01-04 14:06:47 [loggers.py:111] Engine 000: Avg prompt throughput: 244.1 tokens/s, Avg generation throughput: INFO 01-04 14:06:50 [loggers.py:111] Engine 000: Avg prompt throughput: 229.6 tokens/s, Avg generation throughput: 10.1 tokens/s, Running: 1 reqsINFO: 10.46.17.192:0INFO 01-04 14:06:57 [loggers.py:111] Engine 000: INFO 01-04 14:07:00 [loggers.py:111] Engine 000: Avg prompt throughput: 511.7 tokens/s, Avg generation throughput: 87.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KVINFO 01-04 14:07:07 [loggers.py:111] Engine 000: Avg prompt throughput: 264.4 tokens/s, Avg generation throughput:INFO 01-04 14:07:10 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
99
+ INFO: 10.45.190.192:0 - "POST /v1/completiINFO: 10.46.17.19INFO 01-04 14:07:17 [loggers.py:111] Engine 0INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
100
+ INFO 01-04 14:07:20 [loggers.py:111] Engine 000: Avg prompt throughput: 570.5 tokens/s, Avg generation throughINFO: 10.43.30.4:0 - "POST /v1/comINFO 01-04 14:07:25 [loggeINFO 01-04 14:07:27 [loggers.py:111] Engine 00INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
101
+ INFO 01-04 14:07:30 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokeINFOINFO: 10.43.30.5:0 - INFO 01-04 14:07:37 [loggers.py:111] Engine 000: Avg prompt throughput: 250.0 tokens/s, Avg generation throughput: 64.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:07:40 [loggers.py:111] Engine 000INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
102
+ INFO: 10.43.30.5:0 - "POST /v1/coINFO 01-04 14:07:45 [loggerINFO 01-04 14:07:47 [loggers.py:111] Engine 000: Avg prompt throughput: 201.9 tokens/s, Avg generation thrINFO 01-04 14:07:50 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, INFO 01-04 14:07:55 [loggerINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
103
+ INFO 01-04 14:07:57 [loggers.py:111] EngINFO 01-04 14:08:00 [loggers.py:111] Engine 000: Avg prompt throughput: 412.5 tokens/s, Avg generation throughput: 79.1 tokens/s, Running: 3 reqs, WaitiINFO: 10.43.30.4:0 - "PINFO 01-04 14:08:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation througINFO 01-04 14:08:10 [loggers.py:111] Engine 000: Avg prompt throughput: 160.1 tokens/s, Avg generation throughput: 102.0 tokens/s, Running: 3 reqs, WaitINFO: 10.46.50.192INFO: 10.46.50.192:0 - "POST /v1/completINFO 01-04 14:08:13 [lINFO 01-04 14:08:17 [loggers.py:111] Engine 000INFO 01-04 14:08:20 [loggers.py:111] Engine 000: Avg prompt throughput: 309.3 tokens/s, Avg generation throughput: 107.1 tokens/s, Running: 3 reqs, WINFO: 10.43.30.3:0 INFO 01-04 14:08:27 [loggers.py:111] Engine 000: Avg prompt throughput: 274.5 tokens/s, Avg generation throughput:INFO 01-04 14:08:30 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
104
+ INFO: 10.46.17.192:0 - "POST /v1/comINFO 01-04 14:08:35 [loggeINFO 01-04 14:08:37 [loggers.py:111] Engine 000: AINFO 01-04 14:08:40 [loggers.py:111] Engine 000: Avg prompt throughput: 281.3 tokens/s, Avg generation throughput: 87.1 tokens/s, Running: 2 reqsINFO 01-04 14:08:43 [loggers.py:111] Engine 000: Avg prompt throughput: 334.6 tokens/s, AvINFO 01-04 14:08:47 [loggers.py:111] Engine 000: AINFO 01-04 14:08:50 [loggers.py:111] Engine 000: Avg prompt throughput: 484.9 tokens/s, Avg generation throughput: 93.2 tokens/s, Running: 3 reqINFO 01-04 14:08:53 [loggINFO: 10.43.30.4:0 - "POST /v1/coINFO: 10.46.50.192:0 - INFO 01-04 14:08:57 [loggers.py:111] Engine 000: Avg prompt throughput: 349.3 tokens/s, Avg generation throughput: 52.8 INFO 01-04 14:09:00 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:09:03 [loggerINFO 01-04 14:09:07 [loggers.py:111] Engine 000: Avg prompt throughput: 224.3 tokens/s, Avg generation throughput: 48.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO:INFO 01-04 14:09:13 [loggers.py:111] Engine 000: Avg prompt throughput: 341.7 tokens/s, Avg geINFO 01-04 14:09:17 [loggers.py:111] Engine 000: Avg prompt throughput: 169.0 tokens/s, Avg generation throughput: 54.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.INFOINFO 01-04 14:09:23 [loggers.py:111] Engine 000: Avg prompt throughput: 409.8 tokens/s, Avg geneINFO 01-04 14:09:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 70.7 tINFINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
105
+ INFO 01-04 14:09:33 [loggers.py:111] Engine 000: Avg prompt throughput: 265.8 tokens/s, Avg generINFO 01-04 14:09:37 [loggers.py:111] Engine 000: Avg prompt throughput: 268.1 tokens/s, Avg generation throughput: 25.3INFINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
106
+ INFO 01-04 14:09:43 [loggers.py:1INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
107
+ INFO 01-04 14:09:47 [loggers.py:111] Engine 000: Avg prompt throughput: 188.7 tokens/s, Avg generation throughput: 68.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-04 14:09:53 [loggers.py:111] Engine 000: Avg prompt throughput: 209.5 tokens/s, Avg generatioINFO 01-04 14:09:57 [loggers.py:111] Engine 000: Avg prompt throughput: 450.7 tokens/s, Avg generation throughput: 55.INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 INFO 01-04 14:10:03 [loggers.py:111] EINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
108
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
109
+ INFO 01-04 14:10:07 [loggers.py:111] Engine 000: AINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
110
+ INFO 01-04 14:10:13 [loggers.py:111] Engine 000: Avg prompt throughput: 336.4 tokens/s, Avg generation INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
111
+ INFO 01-04 14:10:17 [loggers.py:111] Engine 000: Avg prompt throughput: 386.2 tokens/s, Avg generation throughput:INFO 01-04 14:10:23 [loggers.py:111] Engine 000: Avg prompt throughput: 253.9 tokens/s, Avg generation thINFO 01-04 14:10:27 [loggers.py:111] Engine 000: Avg prompt throughput: 228.2 tokens/s, Avg generation throughput: 37.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usINFO 01-04 14:10:33 [loggers.py:111] EngINFO 01-04 14:10:37 [loggers.py:111] Engine 000: Avg prompt throughput: 340.4 tokens/s, Avg generation throughput: 52.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache uINFO:INFO 01-04 14:10:40 [loggers.py:111]INFO: 10.46.50.192:0 -INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
112
+ INFO 01-04 14:10:43 [loggers.py:111] EnginINFO 01-04 14:10:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 38INFO 01-04 14:10:50 [loggers.py:111] Engine 000: Avg prompt tINFO: 10.46.50.192:0 - "POST /v1/compINFO 01-04 14:10:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.INFO 01-04 14:11:00 [loggers.py:111] Engine 000: Avg prompt throughput: 193.1 tokens/s, Avg generatioINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
113
+ INFO 01-04 14:11:07 [loggers.py:111] Engine 000: AvgINFO 01-04 14:11:10 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.0 tokeINFO 01-04 14:11:13 [loggers.py:111] Engine 0INFO 01-04 14:11:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cachINFO: 10.43.30.5:0 - "POST /v1/completioINFO 01-04 14:11:47 [loggers.py:111] Engine 000: Avg prompt throughput: 424.8 tokens/s, Avg generation throughput: 37.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 13.3%
114
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
115
+ INFO 01-04 14:11:57 [loggers.py:111] Engine 000: Avg prompt throughput: 307.4 tokens/s, Avg generation throughput: 16.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU INFO 01-04 14:12:03 [loggers.py:111] Engine 000: AvINFO 01-04 14:12:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KINFO 01-04 14:12:13 [loggers.py:111] Engine 000: AINFO 01-04 14:12:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU INFO 01-04 14:12:23 [loggers.py:111] Engine 000: AvINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
116
+ INFO 01-04 14:12:27 [loggers.py:111] Engine 000: Avg prompt throughput: 517.1 tokens/s, Avg generation throughput: 102.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPINFO 01-04 14:12:33 [loggers.py:111] Engine 000: Avg INFO 01-04 14:12:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 95.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 12.4%
117
+ INFO: 10.INFO: 10.46.50.192:0 - "POST /v1/completions HTTPINFO: 10.INFO 01-04 14:12:43 [loggers.py:111] Engine 000: Avg INFO 01-04 14:12:47 [loggers.py:111] Engine 000: Avg prompt throughput: 293.8 tokens/s, Avg generation throughput: 52.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GINFO 01-04 14:12:53 [loggers.py:111] Engine 000: Avg pINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
118
+ INFO 01-04 14:12:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, INFO 01-04 14:13:03 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:13:07 [loggers.py:111] Engine 000: Avg prompt throughput: 225.2 tokens/s, Avg generation throughput: 16.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs,INFO 01-04 14:13:13 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:13:17 [loggers.py:111] Engine 000: Avg prompt throughput: 215.6 tokens/s, Avg generation throughput: 70.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs,INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.INFO: INFO 01-04 14:13:23 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:13:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 69.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 13.2%
119
+ INFO: 10INFO 01-04 14:13:33 [loggers.py:111] Engine 000: Avg pINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
120
+ INFO 01-04 14:13:37 [loggers.py:111] Engine 000: Avg prompt throughput: 662.8 tokens/s, Avg generINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
121
+ INFO 01-04 14:13:43 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:13:47 [loggers.py:111] Engine 000: Avg prompt throughput: 294.4 tokens/s, Avg generation throughput: 58.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs,INFO 01-04 14:13:53 [loggers.py:111] Engine 000: Avg prompt throughput: 412.0 tokens/s, Avg generation throughput: 80.9 toINFO 01-04 14:13:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
122
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
123
+ INFO 01-04 14:14:03 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:14:07 [loggers.py:111] Engine 000: Avg prompt throughput: 995.4 tokens/s, Avg generation throughput: 60.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
124
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
125
+ INFO 01-04 14:14:17 [loggers.py:111] Engine 000: Avg prompt throughput: 437.8 tokens/s, Avg generation INFO 01-04 14:14:23 [loggers.py:111] Engine 000: Avg prompt throughput: 556.7 tokens/s, Avg generation throughput: 1INFO 01-04 14:14:27 [loggers.py:111] Engine 000: Avg prompt throughput: 293.1 tokens/s, Avg generation tINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
126
+ INFO: 10.46.17.192:0 - "POST /v1/completions INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
127
+ INFO 01-04 14:14:37 [loggers.py:111] Engine 000: Avg prompt throughput: 382.0 tokens/s, Avg generation throughput: 89.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 12.1%
128
+ INFO: 10.46.1INFO: 10.43.30.4:0 - "POST /v1/completions HTINFO 01-04 14:14:47 [loggers.py:111] Engine 000: Avg prompt throughput: 519.8 tokens/s, Avg generation throughput: 68.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 12.8%
129
+ INFO: 10.46INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
130
+ INFO 01-04 14:14:53 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:14:57 [loggers.py:111] Engine 000: Avg prompt throughput: 328.9 tokens/s, Avg generation throughput: 28.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs,INFO 01-04 14:15:03 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:15:07 [loggers.py:111] Engine 000: Avg prompt throughput: 266.3 tokens/s, Avg generation throughput: 60.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 12.3%
131
+ INFO: INFO 01-04 14:15:13 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:15:17 [loggers.py:111] Engine 000: Avg prompt throughput: 277.3 tokens/s, Avg generation throughput: 84.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 12.1%
132
+ INFO: INFO 01-04 14:15:23 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:15:27 [loggers.py:111] Engine 000: Avg prompt throughput: 217.5 tokens/s, Avg generation throughput: 101.9 tokens/s, Running: 2 reqs, Waiting: INFO 01-04 14:15:33 [loggers.py:111] Engine 000: Avg prompt thrINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
133
+ INFO 01-04 14:15:37 [loggers.py:111] Engine 000: Avg prompt throughput: 472.3 tokens/s, Avg generation throughput: 107.2 tokens/s, Running: 2 reqs, WaitiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
134
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
135
+ INFO 01-04 14:15:47 [loggers.py:111] Engine 000: Avg prompt throughput: 443.4 tokens/s, Avg generation throughput: 113.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 13.8%
136
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
137
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
138
+ INFO 01-04 14:15:57 [loggers.py:111] Engine 000: Avg prompt throughput: 688.7 tokens/s, Avg generation throughput: 112.5 tokens/s, Running: 1 reqs, WaitiINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
139
+ IINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
140
+ INFO 01-04 14:16:07 [loggers.py:111] Engine 000: Avg prompt throughput: 682.3 tokens/s, Avg generation throughput: 80.1 tokens/s, Running: 2 reqs, WaiINFO 01-04 14:16:13 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:16:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.6 tokens/s, Running: 2 reqs, WaINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
141
+ INFO 01-04 14:16:23 [loggers.py:111] Engine 000: Avg prompt throughput: 442.8 tokens/s, Avg generation throughput: 55.0 tokens/s, RunnINFO 01-04 14:16:27 [loggers.py:111] Engine 000: Avg prompt throughput: 396.1 tokens/INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
142
+ INFO 01-04 14:16:33 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:16:37 [loggers.py:111] Engine 000: Avg prompt throughput: 455.7 tokens/s, Avg generation throughput: 76.7 tokens/s, Running: 2 reqs, WaINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
143
+ INFO 01-04 14:16:43 [loggers.py:111] Engine 000: Avg prompt throughput: 736.4 tokens/s, Avg generation throughput: 67.5 tokens/s, RunniINFO 01-04 14:16:47 [loggers.py:111] Engine 000: Avg prompt throughput: 371.7 tokenINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
144
+ INFO 01-04 14:16:53 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:16:57 [loggers.py:111] Engine 000: Avg prompt throughput: 706.2 tokens/s, Avg generation throughput: 78.8 tokens/s, Running: 2 reqs, INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
145
+ INFO 01-INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
146
+ INFO 01-04 14:17:07 [loggers.py:111] Engine 000: Avg prompt throughput: 375.2 tokens/s, Avg generation throughput: 95.7 tokens/s, Running: 2 reqs, INFO 01-04 14:17:13 [loggers.py:111] Engine 000: Avg prompt throughput: 587.3 tokens/s, Avg generation throughput: 48.8 tokens/s, Running: INFO 01-04 14:17:17 [loggers.py:111] Engine 000: Avg prompt throughput: 509.2 toINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
147
+ INFO 01-04 14:17:23 [loggers.py:111] Engine 000: Avg prompt throughput: 299INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
148
+ INFO 01-04 14:17:27 [loggers.py:111] Engine 000: Avg prompt throughput: 728.8 INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
149
+ INFO 01-04 14:17:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0INFO 01-04 14:17:37 [loggers.py:111] Engine 000: Avg prompt throughput: 261.6 tokens/s, Avg generation throughput: 109.0 tokens/s, Running: 3 INFO 01-04 14:17:43 [loggers.py:111] Engine 000: Avg prompt throughput: 526.8 INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
150
+ INFO 01-04 14:17:47 [loggers.py:111] Engine 000: Avg prompt throughput: 66INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
151
+ INFO 01-04 14:17:53 [loggers.py:111] Engine 000: Avg prompt throughput: 520.6 tokens/s, Avg generation throughput: 106.3 tokens/s, Running: 2 reqINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
152
+ INFO 01-04 14:17:57 [loggers.py:111] Engine 000: Avg prompt throughput: 68INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
153
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
154
+ INFO 01-04 14:18:03 [loggers.py:111] Engine 000: Avg prompt throughput: 424.7 tokens/s, Avg generation throughput: 60.6 tokens/s, Running: 0 reqINFO 01-04 14:18:07 [loggers.py:111] Engine 000: Avg prompt throughput: 293INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
155
+ INFO 01-04 14:18:13 [loggers.py:111] Engine 000: Avg prompt throughput: 717.0 tokens/s, Avg generation throughput: 23.1 tokens/s, Running: 0 rINFO 01-04 14:18:17 [loggers.py:111] Engine 000: Avg prompt throughput: 523.IINFO 01-04 14:18:23 [loggers.py:111] Engine 000: Avg prompt throughput: 406.4 tokens/s, Avg generation throughput: 4.8 tokens/s, Running: 1 reINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
156
+ INFO 01-04INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
157
+ INFO 01-04 14:18:33 [loggers.py:111] Engine 000: Avg prompt throughput: 543.4 tokens/s, Avg generation throughput: 48.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit raINFO 01-0INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
158
+ INFO 01-04 14:18:43 [loggers.py:111] Engine 000: Avg prompt throughput: 531.7 tokens/s, Avg generation throughput: 56.5 tokens/s, Running: 1 reqINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
159
+ INFO 01-04INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
160
+ INFO 01-04 14:18:53 [loggers.py:111] Engine 000: Avg prompt throughput: 655.8 tokens/s, Avg generation throughput: 44.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit raINFO 01-0INFO 01-04 14:19:03 [loggers.py:111] Engine 000: Avg prompt throughput: 624.2 tokens/s, Avg generation throughput: 62.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cache hit ratINFO: INFO 01-04 14:19:13 [loggers.py:111] Engine 000: Avg prompt throughput: 702.0 tokens/s, Avg generation throughput: 117.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.5%, Prefix cache hit rate: 10.0%
161
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1INFO 01-0INFO 01-04 14:19:23 [loggers.py:111] Engine 000: Avg prompt throughput: 710.5 tokens/s, Avg generation throughput: 115.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.8%, Prefix cache hit rate: 9.7%
162
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
163
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1"INFO 01-INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
164
+ INFO 01-04 14:19:33 [loggers.py:111] Engine 000: Avg prompt throughput: 356.9 tokens/s, Avg generation throughput: 68.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rateINFO 01INFO 01-04 14:19:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 9INFOINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
165
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
166
+ INFO 01-04 14:19:53 [loggers.py:111] Engine 000: Avg prompt throughput: 549.1 tokens/s, Avg generation throughput: 45.5 tokens/s, Running: 0 reqs, WINFO 01-04 14:19:57 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:20:03 [loggers.py:111] Engine 000: Avg prompt throughput: 580.3 tokens/s, Avg generation throughput: 27.8 tokens/s, Running: 1 reqs, INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
167
+ INFO INFO 01-04 14:20:13 [loggers.py:111] Engine 000: Avg prompt throughput: 510.5 tokens/s, Avg generation throughput: 63.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.7%, Prefix cache hit rate: 9INFO 01-04 14:INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1"INFO 01-04 14:20:23 [loggers.py:111] Engine 000: Avg prompt throughput: 472.4 tokens/s, Avg generation throughput: 91.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: INFO INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
168
+ INFO 01-04 14:20:33 [loggers.py:111] Engine 000: Avg prompt throughput: 724.7 tokens/s, Avg generation throughput: 108.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.4%, Prefix cache hit rate: 8.8ININFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
169
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
170
+ INFO 01-04 14:20:43 [loggers.py:111] Engine 000: Avg prompt throughput: 567.8 tokens/s, Avg generation throughput: 74.5 tokens/s, Running: 1 reqs, WaINFO 01-04 14:20:47 [loggers.py:111] Engine 000: Avg prompt throughput: 500.0 tokens/s, Avg generation throughput: 88.6 tokens/s, RunniINFO 01-04 14:20:53 [loggers.py:111] Engine 000: Avg prompt throughput: 278.0 tokensINFO 01-04 14:20:57 [loggers.py:111] Engine 000: Avg prompt throughput: 799.4 tokens/s, Avg generation throughput: 95.8 tokens/s, RunniINFO 01-04 14:21:03 [loggers.py:111] Engine 000: Avg prompt throughput: 307.5 tokens/s, Avg generation throughput: 67.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 9.9INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
171
+ ININFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
172
+ INFO 01-04 14:21:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 32.7 tokens/s, Running: 0 reqs, Waiting:INFO 01-04 14:21:17 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:21:23 [loggers.py:111] Engine 000: Avg prompt throughput: 639.5 tokens/s, Avg generation throughput: 40.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 9.7%
173
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
174
+ INFO 01-04 14:21:33 [loggers.py:111] Engine 000: Avg prompt throughput: 431.7 tokens/s, Avg generation throughput: 52.3 tokens/s, Running: 1 reqs, Waiting:INFO 01-04 14:21:37 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:21:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 9.6%
175
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
176
+ INFO 01-04 14:21:53 [loggers.py:111] Engine 000: Avg prompt throughput: 533.9 tokens/s, Avg generation throughput: 73.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 9.4%
177
+ INFO 01-04 14:22:03 [loggers.py:111] Engine 000: Avg prompt throughput: 376.2 tokens/s, Avg generation throughput: 69.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache hit rate: 9.3%
178
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
179
+ INFO 01-04 14:22:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 72.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 9.3%
180
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
181
+ INFO 01-04 14:22:23 [loggers.py:111] Engine 000: Avg prompt throughput: 502.6 tokens/s, Avg generation throughput: 63.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 9.2%
182
+ INFO: INFO 01-04 14:22:27 [loggers.py:111] Engine 000: Avg pINFO 01-04 14:22:33 [loggers.py:111] Engine 000: Avg prompt throughput: 740.4 tokens/s, Avg generation throughput: 29.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GINFO 01-04 14:22:37 [loggers.py:111] Engine 000: Avg prompt throughput: 845.2 tokens/s, Avg generation throughput: 72.8INFO 01-04 14:22:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generationINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
183
+ INFO 01-04 14:22:47 [loggers.py:111] Engine 000: AvgINFO 01-04 14:22:53 [loggers.py:111] Engine 000: Avg prompt throughput: 697.8 tokens/s, Avg generation throughput: 0.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 8.9%
184
+ INFO 01-04 14:23:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 8.9%
185
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
186
+ INFO 01-04 14:23:13 [loggers.py:111] Engine 000: Avg prompt throughput: 830.0 tokens/s, Avg generation throughput: 30.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 8.7%
187
+ INFO 01-04 14:23:23 [loggers.py:111] Engine 000: Avg prompt throughput: 188.6 tokens/s, Avg generation throughput: 84.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 8.6%
188
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
189
+ INFO 01-04 14:23:33 [loggers.py:111] Engine 000: Avg prompt throughput: 822.6 tokens/s, Avg generation throughput: 95.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 8.6%
190
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
191
+ INFO 01-04 14:23:43 [loggers.py:111] Engine 000: Avg prompt throughput: 641.4 tokens/s, Avg generation throughput: 86.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 8.4%
192
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
193
+ INFO 01-04 14:23:53 [loggers.py:111] Engine 000: Avg prompt throughput: 483.3 tokens/s, Avg generation throughput: 93.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.8%, Prefix cache hit rate: 8.3%
194
+ INFO 01-04 14:24:03 [loggers.py:111] Engine 000: Avg prompt throughput: 496.9 tokens/s, Avg generation throughput: 100.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.1%, Prefix cache hit rate: 8.2%
195
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
196
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
197
+ INFO 01-04 14:24:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 111.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 8.2%
198
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
199
+ INFO 01-04 14:24:23 [loggers.py:111] Engine 000: Avg prompt throughput: 1449.0 tokens/s, Avg generation throughput: 46.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.8%, Prefix cache hit rate: 8.0%
200
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
201
+ INFO 01-04 14:24:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 71.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 8.0%
202
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
203
+ INFO 01-04 14:24:43 [loggers.py:111] Engine 000: Avg prompt throughput: 581.9 tokens/s, Avg generation throughput: 49.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:24:47 [loggers.py:111] Engine 00INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
204
+ INFO 01-04 14:24:53 [loggers.py:111] Engine 000: Avg prompt throughput: 759.9 tokens/s, Avg generation throuINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
205
+ INFO 01-04 14:24:57 [loggers.py:111] Engine INFO 01-04 14:25:03 [loggers.py:111] Engine 000: Avg prompt throughput: 728.7 tokens/s, Avg generation throughput: 50.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.1%, Prefix cache hit rate: 7.6%
206
+ INFO: 10.46.17.192INFO 01-04 14:25:07 [loggers.py:111] Engine INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
207
+ INFO 01-04 14:25:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughINFO 01-04 14:25:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughINFO 01-04 14:25:23 [loggers.py:111] Engine 000: Avg prompt throughput: 686.7 tokens/s, Avg generation throughput: 36.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cacheINFO 01-04 14:25:27 [loggers.py:111] EnginINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
208
+ INFO 01-04 14:25:33 [loggers.py:111] Engine 000: Avg prompt throughput: 893.4 tokens/s, Avg generation throughpINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
209
+ INFO 01-04 14:25:37 [loggers.py:111] EngINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
210
+ INFO 01-04 14:25:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: INFO 01-04 14:25:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thINFO 01-04 14:25:53 [loggers.py:111] Engine 000: Avg prompt throughput: 353.3 tokens/s, Avg generation throughput:INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
211
+ INFO 01-04 14:25:57 [loggers.py:111] EngINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
212
+ INFO 01-04 14:26:03 [loggers.py:111] Engine 000: Avg prompt throughput: 358.7 tokens/s, Avg generation throughputINFO 01-04 14:26:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throINFO 01-04 14:26:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput:INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
213
+ INFO 01-04 14:26:17 [loggers.py:111] EINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
214
+ INFO 01-04 14:26:23 [loggers.py:111] Engine 000: Avg prompt throughput: 786.4 tokens/s, Avg generation throughput: INFO 01-04 14:26:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thINFO 01-04 14:26:33 [loggers.py:111] Engine 000: Avg prompt throughput: 885.5 tokens/s, Avg generation throughput:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
215
+ INFO 01-04 14:26:37 [loggers.py:111] Engine 000: Avg prompt throughput: 210.6 tokens/s, Avg generation thINFO 01-04 14:26:43 [loggers.py:111] Engine 000: Avg prompt throughput: 816.2 tokens/s, Avg generation throughput:INFO 01-04 14:26:47 [loggers.py:111] Engine 000: Avg prompt throughput: 546.2 tokens/s, Avg generation throughput: 55.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU INFO 01-04 14:26:53 [loggers.py:111] Engine 000: AvINFO 01-04 14:26:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPUINFO 01-04 14:27:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tINFO 01-04 14:27:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
216
+ INFO 01-04 14:27:13 [loggers.py:111] Engine 000: Avg INFO 01-04 14:27:17 [loggers.py:111] Engine 000: Avg prompt throughput: 643.6 tokens/s, Avg generation throughput: 89.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
217
+ INFO 01-04 14:27:23 [loggers.py:111] Engine 000: AvINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
218
+ INFO 01-04 14:27:27 [loggers.py:111] Engine 000: Avg prompt throughput: 655.0 tokens/s, Avg generation INFO 01-04 14:27:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
219
+ INFO 01-04 14:27:37 [loggers.py:11INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
220
+ INFO 01-04 14:27:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.2INFO 01-04 14:27:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatioINFO 01-04 14:27:53 [loggers.py:111] Engine 000: Avg prompt throughput: 673.6 tokens/s, Avg generation throughput: 29INFO 01-04 14:27:57 [loggers.py:111] Engine 000: Avg prompt throughput: 895.3 tokens/s, Avg generatioINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
221
+ INFO 01-04 14:28:03 [loggers.py:111] Engine 000: AINFO 01-04 14:28:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU INFO 01-04 14:28:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.INFO 01-04 14:28:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generationINFO 01-04 14:28:23 [loggers.py:111] Engine 000: Avg prompt throughput: 1382.2 tokens/s, Avg generation throughput: 3INFO 01-04 14:28:27 [loggers.py:111] Engine 000: Avg prompt throughput: 985.3 tokens/s, Avg generation throughput: 35.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KINFO 01-04 14:28:33 [loggers.py:111] Engine 000: AINFO 01-04 14:28:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPUINFO 01-04 14:28:43 [loggers.py:111] Engine 000: Avg prompt throughput: 686.6 tokens/s, Avg generation throughput: 3INFO 01-04 14:28:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation tINFO 01-04 14:28:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46INFO 01-04 14:28:57 [loggers.py:111] Engine 000: Avg prompt throughput: 1087.4 tokens/s, Avg generatiINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
222
+ INFO 01-04 14:29:03 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:29:07 [loggers.py:111] Engine 000: Avg prompt throughput: 666.7 tokens/s, Avg generation throughput: 44.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs,INFO 01-04 14:29:13 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:29:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 91.0 tokens/s, Running: 2 reqs, Waiting: 0 reqsINFO 01-04 14:29:23 [loggers.py:111] Engine 000: Avg promINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
223
+ INFO 01-04 14:29:27 [loggers.py:111] Engine 000: Avg prompt throughput: 1102.8 tokens/s, Avg geINFO 01-04 14:29:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 91.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, PreINFO 01-04 14:29:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg gINFO 01-04 14:29:43 [loggers.py:111] Engine 000: Avg prompt throughput: 877.5 tokens/s, Avg generation throughput: 63.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:29:47 [loggers.py:111] Engine 000: Avg prompt throughput: 481.5 tokens/s, Avg generation throughpuINFO 01-04 14:29:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 60.3 tokensINFO 01-04 14:29:57 [loggers.py:111] Engine 000: AINFO 01-04 14:29:57 [loggers.py:111] EnginINFO 01-04 14:30:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cacheINFO 01-04 14:30:07 [loggers.py:111] Engine 000: Avg prompt throughput: 886.6 tokens/s, Avg generation throuINFO 01-04 14:30:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughputINFO 01-04 14:30:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thrINFO 01-04 14:30:23 [loggers.py:111] Engine 000: Avg prompt throughput: 459.3 tokens/s, Avg generation throughputINFO 01-04 14:30:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
224
+ INFO 01-04 14:30:33 [loggers.py:111] Engine 00INFO 01-04 14:30:37 [loggers.py:111] Engine 000: Avg prompt throughput: 662.1 tokens/s, Avg generation throughput: 46.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KVINFO 01-04 14:30:43 [loggers.py:111] Engine 000: Avg prompt throughput: 899.9 tokens/s, Avg generation throughputINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
225
+ INFO 01-04 14:30:47 [loggers.py:111] EngiINFO 01-04 14:30:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.6 tokens/s, RuINFO 01-04 14:30:57 [loggers.py:111] Engine 00INFO 01-04 14:30:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thrINFO 01-04 14:31:03 [loggers.py:111] Engine 000: Avg prompt throughpINFO: 10.43.30.4:0 - "POST /v1/completioINFO 01-04 14:31:07 [loggers.py:111] Engine 000: Avg prompt throughput: 731.0 tokens/s, Avg generation INFO 01-04 14:31:13 [loggers.py:111] Engine 000: Avg prompt throughput: 520.4 tokens/s, Avg generation throughput: 1INFO 01-04 14:31:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation tINFO 01-04 14:31:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, RunnINFO 01-04 14:31:27 [loggers.py:111] Engine 000: INFO 01-04 14:31:27 [loggers.py:111INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
226
+ INFO 01-04 14:31:33 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:31:37 [loggers.py:111] Engine 000:INFO 01-04 14:31:37 [loggers.py:111] Engine 000: Avg prompt throughput: 434.5 tokens/s, Avg generatiINFO 01-04 14:31:43 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:31:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, WINFO 01-04 14:31:53 [loggers.py:111] Engine 000: Avg prompt throughput: 962.1 tokens/s, Avg generation throughput: 34.4 INFO: 10.46.5INFO 01-04 14:31:57 [loggers.py:111] Engine 000: INFO 01-04 14:31:57 [loggers.py:1INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
227
+ INFO 01-04 14:32:03 [loggers.py:111] Engine 000: Avg prompt throughput:INFO: 10.43.30.4:0 - "POST /v1/completions INFO 01-04 14:32:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatioINFO 01-04 14:32:13 [loggers.py:111] Engine 000: Avg prompt throughput: 597.0 tokens/s, Avg generation throughput: 39INFO 01-04 14:32:37 [loggers.py:111] Engine 000: Avg prompt throughput: 694.9 tokens/s, Avg generation throughput: 26.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 14.3%
228
+ INFO 01-04 14:3INFO 01-04 14:32:57 [loggers.py:111] Engine 000: Avg prompt throughput: 843.1 tokens/s, Avg generation throughput: 18.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 13.7%
229
+ INFO 01-04 14:33:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 43.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 13.7%
230
+ INFO 01-04 14:33:17 [loggers.py:111] Engine 000: Avg prompt throughput: 1062.2 tokens/s, Avg generation throughput: 87.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.1%, Prefix cache hit rate: 13.5%
231
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
232
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
233
+ INFO 01-04 14:33:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 55.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cacINFO 01-04 14:33:27INFO 01-04 14:33:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/INFO 01-04 14:34:03 [loggers.py:111] Engine 000: Avg prompt throughput: 770.6 tokens/s, Avg generation throughput:INFO 01-04 14:34:07INFO 01-04 14:34:47 [loggers.py:111] Engine 000: Avg prompt throughput: 497.2 tokens/s, Avg generation throughput: 36.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 13.4%
234
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
235
+ INFO 01-04 14:34:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 5.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.4%
236
+ INFO 01-04 14:35:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.4%
237
+ INFO 01-04 14:36:17 [loggers.py:111] Engine 000: Avg prompt throughput: 927.3 tokens/s, Avg generation throughput: 9.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 14.4%
238
+ INFO 01-04 14:36:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 14.4%
239
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
240
+ INFO 01-04 14:36:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 8.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.4%
241
+ INFO 01-04 14:36:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.4%
242
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
243
+ INFO 01-04 14:37:37 [loggers.py:111] Engine 000: Avg prompt throughput: 1222.8 tokens/s, Avg generation throughput: 37.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.1%
244
+ INFO 01-04 14:37:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.1%
245
+ INFO 01-04 14:38:57 [loggers.py:111] Engine 000: Avg prompt throughput: 1079.6 tokens/s, Avg generation throughput: 1.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 13.9%
246
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
247
+ INFO 01-04 14:39:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 20.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.9%
248
+ INFO 01-04 14:39:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.9%
249
+ INFO 01-04 14:41:07 [loggers.py:111] Engine 000: Avg prompt throughput: 1279.1 tokens/s, Avg generation throughput: 43.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.2%, Prefix cache hit rate: 14.6%
250
+ INFO 01-04 14:41:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.4%, Prefix cache hit rate: 14.6%
251
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
252
+ INFO 01-04 14:41:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.6%
253
+ INFO 01-04 14:41:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.6%
254
+ INFO 01-04 14:42:27 [loggers.py:111] Engine 000: Avg prompt throughput: 524.2 tokens/s, Avg generation throughput: 32.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 14.5%
255
+ INFO 01-04 14:42:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 14.5%
256
+ INFO 01-04 14:42:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 14.5%
257
+ INFO 01-04 14:42:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 14.5%
258
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
259
+ INFO 01-04 14:43:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 33.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.5%
260
+ INFO 01-04 14:43:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.5%
261
+ INFO 01-04 14:45:47 [loggers.py:111] Engine 000: Avg prompt throughput: 456.2 tokens/s, Avg generation throughput: 40.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 14.4%
262
+ INFO 01-04 14:45:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 14.4%
263
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
264
+ INFO 01-04 14:46:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 36.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.4%
265
+ INFO 01-04 14:46:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.4%
266
+ INFO 01-04 14:51:47 [loggers.py:111] Engine 000: Avg prompt throughput: 519.4 tokens/s, Avg generation throughput: 19.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 15.0%
267
+ INFO 01-04 14:51:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 15.0%
268
+ INFO 01-04 14:52:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 15.0%
269
+ INFO 01-04 14:52:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 15.0%
270
+ INFO 01-04 14:52:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 15.0%
271
+ INFO 01-04 14:52:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 15.0%
272
+ INFO 01-04 14:52:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 15.0%
273
+ INFO 01-04 14:52:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 15.0%
274
+ INFO 01-04 14:53:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix INFO 01-04 14:58:43 [loggers.py:111] Engine 000: Avg prompt throughput: 459.2 tokens/s, Avg generation throughput: 46.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 11.6%
275
+ INFO 01-04 14:58:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 11.6%
276
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
277
+ INFO 01-04 14:59:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 34.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.6%
278
+ INFO 01-04 14:59:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 11.6%
279
+ 00: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 15.0%
280
+ INFO 01-04 14:54:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.7%, Prefix cache hit rate: 15.0%
281
+ INFO: INFO 01-04 15:08:45 [loggers.py:111] Engine 000: Avg prompt throughput: 536.7 tokens/s, Avg generation throughput: 20.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 18.8%
282
+ INFO 01-04 15:08:55 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 18.8%
283
+ INFO 01-04 15:09:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, WaitiINFO 01-04 15:09:46 [loggers.py:111] Engine 000: Avg prompt throughput: 547.7 tokens/s, Avg generation throughput: 10.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 16.9%
284
+ INFO 01-04 15:09:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 16.9%
285
+ INFO 01-04 15:10:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 16.9%
286
+ INFO 01-04 15:10:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 16.9%
287
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
288
+ INFO 01-04 15:10:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 36.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.9%
289
+ INFO 01-04 15:10:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.9%
290
+ tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.2%
291
+ cache hit rate: 20.2%
292
+ INFO 01-04 14:45:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 20.2%
293
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
294
+ INFO 01-04 14:46:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 19.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.2%
295
+ INFO 01-04 14:46:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.2%
296
+ INFO 01-04 14:47:26 [loggers.py:111] Engine 000: Avg prompt throughput: 826.0 tokens/s, Avg generation throughput: 26.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 20.0%
297
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
298
+ INFO 01-04 14:47:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 1.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.0%
299
+ INFO 01-04 14:47:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.0%
300
+ INFO 01-04 14:57:46 [loggers.py:111] Engine 000: Avg prompt throughput: 561.0 tokens/s, Avg generation throughput: 26.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 20.3%
301
+ INFO 01-04 14:57:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 20.3%
302
+ INFO 01-04 14:58:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 20.3%
303
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
304
+ INFO 01-04 14:58:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 29.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.3%
305
+ INFO 01-04 14:58:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.3%
hf_ip/vllm_gpu5.log ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
2
+ warnings.warn(
3
+ INFO 01-04 13:13:39 [__init__.py:239] Automatically detected platform cuda.
4
+ INFO 01-04 13:13:42 [api_server.py:1043] vLLM API server version 0.8.5
5
+ INFO 01-04 13:13:42 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8006, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
6
+ INFO 01-04 13:13:50 [config.py:717] This model supports multiple tasks: {'reward', 'score', 'embed', 'generate', 'classify'}. Defaulting to 'generate'.
7
+ INFO 01-04 13:13:50 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
8
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
9
+ warnings.warn(
10
+ INFO 01-04 13:13:55 [__init__.py:239] Automatically detected platform cuda.
11
+ INFO 01-04 13:13:58 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
12
+ WARNING 01-04 13:13:58 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7f9660db2860>
13
+ INFO 01-04 13:13:59 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
14
+ INFO 01-04 13:13:59 [cuda.py:221] Using Flash Attention backend on V1 engine.
15
+ WARNING 01-04 13:13:59 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
16
+ INFO 01-04 13:13:59 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+ INFO 01-04 13:15:40 [loader.py:458] Loading weights took 100.81 seconds
35
+ INFO 01-04 13:15:41 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 101.065262 secondININFO 01-04 13:15:55 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
36
+ INFO 01-04 13:15:55 [backends.py:430] Dynamo bytecode transform time: 14.50 s
37
+ INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later usININFO 01-04 13:16:50 [backends.py:148] Compiling a graph for general shape takes 53.44 ININFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 67.94 s in total
38
+ INFO 01-04 13:17:54 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
39
+ INFO 01-04 13:17:54 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94ININFO 01-04 13:18:33 [gpu_model_runner.py:1686] Graph capturing finished in 39 secs, took 1.21 GiB
40
+ INFO 01-04 13:18:33 [core.py:159] init engine (profile, create kv cache, warmup model) took 171.95 seconds
41
+ INFO 01-04 13:18:33 [core_client.py:439] Core engine process 0 ready.
42
+ WARNING 01-04 13:18:33 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
43
+ INFO 01-04 13:18:33 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
44
+ INFO 01-04 13:18:33 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
45
+ INFO 01-04 13:18:33 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8006
46
+ INFO 01-04 13:18:33 [launcher.py:28] Available routes are:
47
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /openapi.json, Methods: HEAD, GET
48
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /docs, Methods: HEAD, GET
49
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: HEAD, GET
50
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /redoc, Methods: HEAD, GET
51
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /health, Methods: GET
52
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /load, Methods: GET
53
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /ping, Methods: POST, GET
54
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /tokenize, Methods: POST
55
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /detokenize, Methods: POST
56
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/models, Methods: GET
57
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /version, Methods: GET
58
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
59
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/completions, Methods: POST
60
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/embeddings, Methods: POST
61
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /pooling, Methods: POST
62
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /score, Methods: POST
63
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/score, Methods: POST
64
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
65
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /rerank, Methods: POST
66
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/rerank, Methods: POST
67
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v2/rerank, Methods: POST
68
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /invocations, Methods: POST
69
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /metrics, Methods: GET
70
+ INFO: Started server process [1317730]
71
+ INFO: Waiting for application startup.
72
+ INFO: Application startup compIININFO 01-04 14:00:01 [loggers.py:111] Engine 000: Avg prompt throughput: 55.5 tokens/s, Avg generation throughput: 37.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.4%, Prefix cache hit rate: 0.IIIINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
73
+ INFO 01-04 14:00:11 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 0 reqs, WaitINFO 01-04 14:00:15 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:00:21 [loggers.py:111] Engine 000: Avg prompt throughput: 51.9 tokens/s, Avg generation throughput: 5.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.2%, Prefix cache hit rate: 0.0%IINFO 01-04 14:00:31 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.4%, Prefix cache hit rate: 0.0%
74
+ IINFOINFO 01-04 14:00:36 [loggers.py:111] Engine 000: Avg INFO 01-04 14:00:41 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs,ININFO 01-04 14:00:46 [loggers.py:111] Engine 000: Avg INFO 01-04 14:00:51 [loggers.py:111] Engine 000: Avg prompt throughput: 137.8 tokens/s, Avg generation throughput: 82.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GINFO 01-04 14:00:56 [loggers.py:111] Engine 000: Avg INFO 01-04 14:01:01 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 141.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 0.0%
75
+ INFO: INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1"INFO: ININFO 01-04 14:01:06 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:01:11 [loggers.py:111] Engine 000: Avg prompt throughput: 62.2 tokens/s, Avg generation throughput: 71.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/INFO 01-04 14:01:21 [loggers.py:111] Engine 000: Avg prompt throughput: 83.6 tokens/s, Avg generation throughput: 110.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 0.0%
76
+ INFO: INFO 01-04 14:01:26 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:01:31 [loggers.py:111] Engine 000: Avg prompt throughput: 66.5 tokens/s, Avg generation throughput: 133.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 0.4%
77
+ INFO:INFOINFO: 10.43.30.5:0 - "POST /v1/completions HTTPINFOINFO 01-04 14:01:41 [loggers.py:111] Engine 000: Avg prompt throughput: 102.8 tokens/s, Avg generation throughput: 108.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate:INFO: INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
78
+ INFO 01-04 14:01:51 [loggers.py:111] Engine 000: Avg prompt throughput: 91.3 tokens/s, Avg generation throughput: 149.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate:INFO 01-04 14:01:57 [loggers.py:111] Engine 000: Avg prompt througINFOINFO 01-04 14:02:01 [loggers.py:111] Engine 000: Avg prompt throughput: 98.0 tokens/s, Avg generation throughput: 106.6 tokens/s, Running: 3 reqs, INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
79
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
80
+ INFO 01INFO 01-04 14:02:11 [loggers.py:111] Engine 000: Avg prompt throughput: 93.4 tokens/s, Avg generation throughput: 102.9 tokens/s, Running: 3 reqs, Waiting: 0 INFO 01-04 14:02:16 [loggers.py:111] Engine 000: Avg INFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompt throughpuININFO 01-04 14:02:21 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg geneINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1INFO: INFINFO 01-04 14:02:26 [loggers.py:111] Engine 000: Avg ININFO 01-04 14:02:27 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:02:31 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 73.1 tokens/s, Running: 1 reqs, INFO: INFO 01-04 14:02:36 [loggers.py:111] Engine 000: Avg prompINFINFO 01-04 14:02:41 [loggers.py:111] Engine 000: Avg prompt throughput: 132.5 tokens/s, Avg generation throughput: 83.6 tokens/s, Running: 2 reqs, Waiting: 0INFO 01-04 14:02:45 [loggers.py:111] Engine 000: Avg prompt tINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
81
+ INFO 01-04 14:02:51 [loggers.py:111] Engine 000: Avg prompt throughput: 116.2 tokens/s, Avg generation throughput: 102.9 tokens/s, Running: 2 reqs, WaitingINFO 01-04 14:02:56 [loggers.py:111] Engine 000: Avg prompt throINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
82
+ INFO 01-04 14:03:01 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 79.8 tokens/s, Running: 1 reqs, WaitingINFO 01-04 14:03:06 [loggers.py:111] Engine 000: Avg prompINFO 0INFO 01-04 14:03:11 [loggers.py:111] Engine 000: Avg prompt throughput: 83.3 tokens/s, Avg generation throughput: 81.7 tokens/s, Running: 2 reqs, Waiting:INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200INFOINFO 01-04 14:03:16 [loggers.py:111] Engine 000: Avg pINFO 0INFO 01-04 14:03:21 [loggers.py:111] Engine 000: Avg prompt throughput: 176.4 tokens/s, Avg generation throughput: 70.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 7.3%
83
+ INFO: IINFO 01-04 14:03:26 [loggers.py:111] Engine 000: AvINFO 0INFO 01-04 14:03:31 [loggers.py:111] Engine 000: Avg prompt throughput: 79.2 tokens/s, Avg generation throughput: 71.9 tokens/s, Running: 2 reqs, Waiting: 0 reINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 2INFO: INFINFO 01-04 14:03:36 [loggers.py:111] Engine 000: Avg INFOINFO 01-04 14:03:41 [loggers.py:111] Engine 000: Avg prompt throughput: 118.3 tokens/s, Avg generation throughput: 90.1 tokens/s, Running: 2 reqs, Waiting: 0 rIINFO 01-04 14:03:46 [loggers.py:111] Engine 000: Avg prompt throughput: 110.7 tokens/s, Avg generation throughput: 56.INFOINFO 01-04 14:03:51 [loggers.py:111] Engine 000: Avg prompt throughput: 144.2 tokens/s, Avg geneINFO 01-04 14:03:56 [loggers.py:111] Engine 000: Avg prompt throughput: 143.6 tokens/s, Avg generation throughput: 114.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: INFINFO:INFO: 10.46.17.192:0 -INFO: 10.46.50.192:0 - "POST /vINFO 01-04 14:04:01 [loggers.py:111] Engine 000: Avg prompt throughput: 78.9 tokens/s, Avg generINFO 01-04 14:04:05 [loggers.py:111] Engine 000: Avg prompt throughput: 107.3 tokens/s, Avg generation throughput: 127.9 tINFO 01-04 14:04:11 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO 01-04 14:04:16 [loggers.py:111] Engine 000: Avg prompt throughput: 117.2 tokens/s, Avg generation throughput: 107.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: ININFO 01-04 14:04:21 [loggers.py:INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
84
+ INFO 01-04 14:04:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 140.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usaINFO INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
85
+ INFO 01-04 14:04:31 [loggers.py:111INFO 01-04 14:04:36 [loggers.py:111] Engine 000: Avg prompt throughput: 145.7 tokens/s, Avg generation throughput: 100.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usINFOINFO 01-04 14:04:41 [loggers.py:111INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
86
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
87
+ INFO 01-04 14:04:46 [loggers.py:111] Engine 000: Avg prompt throughput: 188.8 tokens/s, Avg generation throughINFOINFO 01-04 14:04:51 [loggers.py:111] Engine 000: Avg prompt throughput: 125.8 tokens/s, Avg generation thINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 2INFO 0INFO 01-04 14:04:56 [loggers.py:111] Engine 000INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
88
+ INFO 01-04 14:05:01 [loggers.py:111] Engine 000: Avg prompt throughput: 93.6 tokens/s, Avg generation thrINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
89
+ INFO 01-04 14:05:06 [loggers.py:111] Engine 00INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
90
+ INFO 01-04 14:05:11 [loggers.py:111] Engine 000: Avg prompt throughput: 202.6 tokens/s, Avg generation thrINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
91
+ INFO 01-04 14:05:16 [loggers.py:111] Engine 000: Avg prompt throughput: 205.8 tokens/s, Avg generation throughpuINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
92
+ INFO 01-04 14:05:21 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatiINFO 01-INFO 01-04 14:05:26 [loggers.py:111] Engine 000: Avg prompt throughput: 181.3 tokens/s, Avg generation throughINFO 01-04 14:05:31 [loggers.py:111] Engine 000: Avg prompt throughput: 248.7 tokens/s, Avg generaINFO: INFO: 10.45.190.192:0 - "POST /v1/INFO: 10.43.30.3:0 - "POSINFO 01-04 14:05:36 [loggers.py:111] EngineINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
93
+ INFO 01-04 14:05:41 [loggers.py:111] Engine 000: Avg prompt throughput: 128.7 tokens/s, Avg generation INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
94
+ INFO 01-04INFO 01-04 14:05:46 [loggers.py:111] EngiINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
95
+ INFO 01-04 14:05:51 [loggers.py:111] Engine 000: Avg prompt throughput: 161.7 tokens/s, Avg generatioINFO 01-04INFO 01-04 14:05:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation througINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
96
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
97
+ INFO 01-04 14:06:01 [loggers.py:1INFO: INFO 01-04 14:06:06 [loggers.py:111] Engine 000: Avg prompt throughput: 294.6 tokens/s, Avg generation throughput: 75.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cachINFO: 10.46.50.192:0 - "POST /v1/compleINFO: 10.46.50.192:INFO 01-04 14:06:11 [loggers.pyINFO 01-04 1INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
98
+ INFO 01-04 14:06:16 [loggers.py:111] Engine 000: Avg prompt throughput: 207.7 tokens/s, Avg generation througINFO 01-04 14:06:21 [loggers.py:111] Engine 000: Avg prompt throughput: 375.8 tokens/s, Avg generINFO 01-04 14:06:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 INFO 01-04 14:06:26 [loggers.py:111] Engine 000: Avg prompt throughput: 216.5 tokens/s, Avg generation throughpINFO 01-04 14:06:31 [loggers.pINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
99
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
100
+ INFO 01-04 14:INFO 01-04 14:06:36 [loggers.py:111] Engine 000: AvINFO 01-04 14:06:36 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:06:41 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughINFO 01-04 14:06:46 [loggers.py:111] Engine 000: Avg prompt throughput: 233.9 tokens/s, Avg generation throughput: 67.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cachINFO 01-04 14:06:51 [loggers.py:111] Engine 000: Avg prompt throughput: 312.3 tokens/s, Avg generation througINFO 01-04 14:06:56 [loggers.py:111] Engine 000: Avg prompt throughput: 241.7 tokens/s, Avg generation throughINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
101
+ INFO 01-04 14:07:01 [loggers.py:111] Engine 000: Avg prompt throughput: 210.1 tokens/s, Avg generation throuINFO 01-04 14:07:06 [loggers.py:111] Engine 000: Avg prompt throughput: 208.1 tokens/s, Avg generation throughpINFO 01-04 14:07:06 [loggers.py:111] Engine 000: Avg prompt tINFINFO: 10.46.17.192:0 - "POST /v1/compleINFO: 10.46.17.192:INFO 01-04 14:07:11 [loggers.py:INFO: 1INFO 01-04 14:07:16 [loggers.py:111] Engine 000: Avg prompt throughput: 204.3 tokens/s, Avg generation throughpuINFO 01-04INFO 01-04 14:07:18 [loggers.py:111] Engine 000: Avg INFO 01-04 14:07:21 [loggers.py:111] EngineINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
102
+ INFO 01-04 14:07:26 [loggers.py:111] Engine 000INFO 01-04 14:07:26 [loggers.py:111] Engine 000: Avg prompt throughput: 177.INFO: 10.46.17.192:0 - "POST /v1/completions INFO 01-04 14:07:31 [loggers.py:111] Engine INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
103
+ INFO 01-04 14:07:36 [loggers.py:111] Engine 000: Avg prompt throughput: 522.4 tokens/s, Avg generation thrINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
104
+ INFO 01-04 14:07:41 [loggers.py:111] Engine 000: Avg prompt throughput: 328.3 tokens/s, AvgINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
105
+ INFO 01-04 14:07:43 [INFO 01-04 14:07:46 [loggers.py:111] Engine 000INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1"ININFO 01-04 14:07:51 [loggers.py:111] Engine 000: Avg prompt throughput: 208.1 tokens/s, Avg generation throughpINFO: 10.45.190.192:0 - "POST /v1/completINFO 01-04 14:07:53 [lINFO 01-04 14:07:56 [loggers.py:111] EngiINFO 01-04 14:08:01 [loggers.py:111] Engine 000: Avg prompt throughput: 506.5 tokens/s, Avg generation throughput: 74.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 14.5%
106
+ INFO: 10.46.50.192:0INFO 01-04 14:08:06 [loggers.py:111] Engine 000: Avg prompt throughput: 303.7 tokens/s, Avg generation throuINFO 01-04 14:08:11 [loggers.py:111] Engine 000: Avg prompt throughput: 299.0 tokens/s, INFO 01-04 14:08:13 [loggers.py:111] Engine 000: Avg prompt throughput: 307.5 tokens/s, INFO 01-04 14:08:16 [loggers.py:111] Engine 000: Avg prompt throughput: 224.7 tokens/s, Avg generationINFO 01-INFO: 10.45INFO INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
107
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
108
+ INFO 01-04 14:08:23 [logINFO 01-04 14:08:26 [loggers.py:111] Engine 000: Avg prompt throughput: 211.0 tokens/s, Avg generatINFO 01-04 14:08:27 [loggerINFO 01-04 14:08:28 [loggers.py:111] Engine 000: INFO 01-04 14:08:31INFO 01-04 14:08:33 [loggers.py:111] Engine 000: Avg prompt throughput: 257.0 tokens/s, AINFO 01-04 14:08:36 [loggers.py:111] Engine 000: Avg prompt throughput: 331.1 tokens/s, Avg generatioINFO 01-INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
109
+ INFO 01-04 14:08:41 INFO 01-04 14:08:43 [loggeINFO 01-04 14:08:46 [loggers.py:111] Engine 000: Avg prompt throughput: 416.6 tokens/s, Avg generation throughput: 60.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV caINFO 01-04 14:08:51 [loggers.py:111] Engine 000: Avg prompt throughput: 571.5 tokens/s, Avg generation throughpuINFO 01-04 14:08:56 [loggers.py:111] Engine 000: Avg prompt throughput: 386.5 tokens/s, Avg generation throughput: 73.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV caINFO 01-04 14:09:01 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/INFO 01-04 14:09:03 [loggersINFO: 10.46.50.192:0 - "POST /v1/completionsINFO 01-04 14:09:0INFO 01-04 14:09:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thrINFO 01-04 14:09:11 [loggers.py:111] Engine 000: Avg prompt throughput: 436.0 tokensINFO 01-04 14:09:13 [loggersINFO 01-04 14:09:16 [loggers.py:111] Engine 000: Avg prompt throughput: 177.9 tokens/s, Avg generation throughput:INFO 01-04 14:09:16 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:09:21 [loggers.py:111] Engine 000INFO: 10.45.190.192:0 - "POST /vINFO 01-04 14:09:23 [loggers.py:111] Engine 000: Avg prompt throughput: 339.9 tINFO: 10.43.INFO 01-04 14:09:26 [loggers.py:111] EnINFO 01-04 14:09:31 [loggers.py:111] Engine 000: Avg prompt throughput: 183.5 tokensINFO 01-04 14:09:33 [loggers.py:111] Engine 000: Avg prompt throughput: 300.8 tokens/s, Avg geneINFO 01-04 14:09:36 [loggers.py:111] Engine 000: Avg prompt throughput: 255.0 tokens/s, Avg generation INFO 01-04 14:09:41 [loggers.py:111] Engine 000: Avg prompt throughput: 624.3 tokenINFO 01-04 14:09:43 [loggers.py:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
110
+ INFO 01-04 14:09:46 [loggers.py:111] Engine 000: Avg prompt throughput: 263.6 tokens/s, Avg generation throughput: 5INFO 01-04 14:09:46 [loggers.py:111] Engine 000: Avg pINFO: 10.4INFO: 10.43.30.3:0 - "POST /v1/INFO 01-04 14:09:56 [loggers.py:111] Engine 000: Avg prompt throughput: 248.6 tokens/s, Avg generation throughput: 62.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usaINFO: 10.46.17.192:0 - "POST /v1/coINFO 01-04 14:10:06 [loggers.py:111] Engine 000: Avg prompt throughput: 445.9 tokens/s, Avg generation throughput: 105.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 9.8%
111
+ INFO: 10.46.17.192:0 - INFO: 10.46.17.192:0 - "POST /v1/coINFO: 10.46.50.192:0 - INFO 01-04 14:10:13 [loggers.py:111] EnINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
112
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
113
+ INFO 01-04 14:10:16 [loggers.py:111] INFO 01-04INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
114
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
115
+ INFO 01-04 14:10:23 [loggers.py:111] EngiINFO 01-04 14:10:26 [loggers.py:111] Engine 000: Avg prompt throughput: 401.7 tokens/s, Avg generatioINFO 01-04 14:10:31 [loggers.py:111] Engine 000: Avg prompt throughput: 393.8 tokens/s, Avg generation throughput: 66.INFO: 10.43.30.3:0 - "INFO 01-04 14:10:33 [loggers.py:111] EINFO 01-04 14:10:36 [loggers.py:111] INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
116
+ INFO 01-04 14:10:41 [loggers.py:111] Engine 000: Avg prompt throughput: 240.5INFO: 10.46.17.192:0 - "POST /v1/coINFO 01-04 14:10:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thrINFO 01-04 14:10:51 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: INFO 01-04 14:10:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KVINFO: 10.46.50.192:0 - "POST /v1/completions INFO 01-04 14:11:06 [loggers.py:111] Engine 000: Avg prompt throughput: 401.6 tokens/s, Avg generation throughput: 79.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usagINFO 01-04 14:11:06 [loggers.py:111] EINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
117
+ INFO 01-04 14:11:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 79.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KINFO 01-04 14:11:21 [loggers.py:111] Engine 000: AINFO: 10.43.30.3:INFO 01-04 14:11:43 [loggers.py:111] Engine 000: Avg prompt throughput: 317.7 tokens/s, Avg generation throughput: 18.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 8.8%
118
+ INFO 01-04 14:11:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 8.8%
119
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
120
+ INFO 01-04 14:12:03 [loggers.py:111] Engine 000: Avg prompt throughput: 324.5 tokens/s, Avg generation throughput: 64.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 8.5%
121
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
122
+ INFO 01-04 14:12:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 1.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 8.5%
123
+ INFO 01-04 14:12:23 [loggers.py:111] Engine 000: Avg prompt throughput: 367.4 tokens/s, Avg generation throughput: 29.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 8.2%
124
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
125
+ INFO 01-04 14:12:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 33.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 8INFOINFO 01-04 14:12:43 [loggers.py:111] Engine 000: Avg prompt throughput: 590.1 tokens/s, Avg generation throughput: 57.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 7.8%
126
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
127
+ INFO 01-04 14:12:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 7.INFINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
128
+ INFO 01-04 14:13:03 [loggers.py:111] Engine 000: Avg prompt throughput: 340.6 tokens/s, Avg generation throughput: 41.0 tokens/s, Running: 1 reqs, WaitINFO 01-04 14:13:07 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:13:13 [loggers.py:111] Engine 000: Avg prompt throughput: 415.2 tokens/s, Avg generation throughput: 57.6 tokens/s, Running: 2 reqs, WaiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
129
+ INFO 01-04 14:13:17 [loggers.py:111] Engine 000: Avg prompt throughpINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
130
+ INFO 01-04 14:13:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s,INFO 01-04 14:13:27 [loggers.py:111] Engine 000: Avg prompt throughput: 369.0 tokens/s, Avg generation throughput: 37.7 tokens/s, RINFO 01-04 14:13:33 [loggers.py:111] Engine 000: Avg prompt throughput: 292.2 tokens/s, Avg generation throughput: 13.8 tokens/s, Running: 1 reqs, WaitingINFO 01-04 14:13:37 [loggers.py:111] Engine 000: Avg prompt throINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
131
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
132
+ INFO 01-04 14:13:43 [loggINFO 01-04 14:13:47 [loggers.py:111] Engine 000: Avg prompt throughput: 520.6 tokens/s, Avg generation throughput: 59.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, PreINFO 01-04 14:13:53 [loggers.py:111] Engine 000: Avg prompt throughput: 255.1 tokens/s, Avg generation throughput: 23.3 tokens/s, Running: 1 reqs, Waiting: 0 INFO 01-04 14:13:57 [loggers.py:111] Engine 000: Avg prompt INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
133
+ INFO 01-04 14:14:03 [loggers.py:111] Engine 000: Avg prompt throughput: 410.0 tokens/s, Avg genINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
134
+ INFO 01-04 14:14:07 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:14:13 [loggers.py:111] Engine 000: Avg prompt throughput: 406.3 tokens/s, Avg generation throughput: 72.7 tokens/s, Running: 2 reqs, Waiting: 0 INFO 01-04 14:14:17 [loggers.py:111] Engine 000: Avg prompt throughput: 382.7 tokens/s, Avg generation throughput: 35.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, INFO 01-04 14:14:23 [loggers.INFO 01-04 14:14:27 [loggers.py:111] Engine 000: Avg prompt throughput: 666.4 tokens/s, Avg generation throughput: 94.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%,INFO 01-04 14:14:33 [loggers.py:111] Engine 000: Avg prompt throughput: 565.0 tokens/s, Avg generation throughput: 85.4 tokens/s, Running: 2 reqs, Waiting: 0 reqINFO 01-04 14:14:37 [loggers.py:111] Engine 000: Avg promINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
135
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
136
+ INFO 01-04 14:14:43 [loggers.py:111] Engine 000: Avg prompt throughput: 713.0 tokens/s, Avg generINFO 01-04 14:14:47 [loggers.py:111] Engine 000: Avg prompt throughput: 506.8 tokens/s, Avg generation throughput: 69.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%,INFO: 10.46.50.192:0 - "POINFO 01-04 14:14:57 [loggers.py:111] Engine 000: Avg prompt throughput: 605.6 tokens/s, Avg generation throughput: 98.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.2%, Prefix cache hit rate: 10.8%
137
+ INFO: 10.46.50.192:0 - "POST /INFO 01-04 14:15:03 [loggers.py:111] Engine 000: Avg prompt throughput: 521.6 tokens/s, Avg geneINFO 01-04 14:15:07 [loggers.py:111] Engine 000: Avg prompt throughput: 390.4 tokens/s, Avg generation throughput: 111.3 toINFO 01-04 14:15:13 [loggers.py:111] Engine 000: Avg prompt throughput: 337.7 tokens/s, Avg generation throughput: 72.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs,INFO 01-04 14:15:17 [loggers.py:111] Engine 000: Avg prINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
138
+ INFO 01-04 14:15:23 [loggers.py:111] Engine 000: Avg prompt throughput: 991.8 tokens/s, Avg generation throughput: 109.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.7%, Prefix cache hit rate: 9.7%
139
+ INFO: 1INFO 01-04 14:15:27 [loggers.py:111] Engine 000: Avg prINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
140
+ INFO 01-04 14:15:33 [loggers.py:111] Engine 000: Avg prompt throughput: 209.0 tokens/s, Avg generation throughput: 82.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, INFO 01-04 14:15:37 [loggers.py:111] Engine 000: Avg pINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
141
+ INFO 01-04 14:15:43 [loggers.py:111] Engine 000: Avg prompt throughput: 343.3 tokens/s, Avg generation throughput: 105.4 tokens/s, Running: 2 reqs, Waiting: 0 reqsINFO 01-04 14:15:47 [loggers.py:111] Engine 000: Avg prompt throughput: 284.9 tokens/s, Avg generation throughput: 42.2 toINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
142
+ INFO 01-04 14:15:53 [loggers.pyINFO 01-04 14:15:57 [loggers.py:111] Engine 000: Avg prompt throughput: 343.0 tokens/s, Avg generation throughput: 58.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9INFO 01-04 14:16:03 [loggers.py:INFO 01-04 14:16:07 [loggers.py:111] Engine 000: Avg prompt throughput: 287.0 tokens/s, Avg generation throughput: 100.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4INFO 01-04 14:16:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation INFO 01-04 14:16:17 [loggers.py:111] Engine 000: Avg prompt throughput: 676.6 tokens/s, Avg generation throughput: INFO 01-04 14:16:23 [loggers.py:111] Engine 000: Avg prompt throughput: 501.5 tokens/s, Avg generation throughput: 105.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
143
+ INFO 01-04 14:16:27 [loggers.py:111] Engine 000: AvINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
144
+ INFO 01-04 14:16:33 [loggers.py:111] Engine 000: Avg prompt throughput: 657.6 tokens/s, Avg generation INFO 01-04 14:16:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 87.INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
145
+ INFO: 10.45.190.192:0 - "POST /INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
146
+ INFO 01-04 14:16:47 [loggers.py:111] Engine 000: Avg prompt throughput: 489.5 tokens/s, Avg generation throughput: 119.6 tokens/s, Running: 2 reqs, Waiting: 0 reqsINFO 01-04 14:16:46 [INFO 01-04 14:16:53 [loggers.py:111]INFO 01-04 14:16:57 [loggers.py:111] Engine 000: Avg prompt throughput: 863.9 tokens/s, Avg generation throughput: 145.6 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usagINFO: 10.43.30.5:0 - "POST /v1/comINFO: 10.46.17.192:0 -INFO 01-04 14:17:03 [loggers.py:111] Engine 000: Avg prompt throughput: 542.5 tokens/s, Avg generation throughput: 74.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV caINFO 01-04 14:17:07 [loggers.py:111] Engine 00INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
147
+ INFO 01-04 14:17:13 [loggers.py:111] Engine 000: Avg prompt throughput: 515.9 tokens/s, Avg generation throughput: 73.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cacINFO: 10.43.30.4:0 - "INFO 01-04 14:17:16 [loggers.py:111] Engine 000: Avg prompt throughput: 517.0 tokenINFO 01-04 14:17:23 [loggers.py:111] Engine 000: Avg prompt throughput: 624.5 tokens/s, Avg generation throughput: 56.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:17:27 [loggers.py:111] Engine 000INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
148
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
149
+ INFO 01-04 14:17:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation througINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
150
+ INFO 01-04 14:17:37 [loggers.py:111] Engine INFO 01-04 14:17:43 [loggers.py:111] Engine 000: Avg prompt throughput: 452.7 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cachINFO 01-04 14:17:47 [loggers.py:111] Engine 000: Avg prompt throughput: 530.9 tokens/s, Avg generation throuINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
151
+ INFO 01-04 14:17:53 [loggers.py:111] Engine 000: Avg prompt throughput: 441.2 tokens/s, Avg generation throughputINFO: INFO: 10.46.50INFO 01-04 14:17:56 [loggers.py:111] EngINFO 01-04 14:17:57 [loggers.py:111] INFINFO 01-04 14:18:03 [loggers.py:111] Engine 000: Avg prompt throughput: 517.5 tokens/s, Avg generation throughput: 24.8 tokens/s, Running: INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
152
+ INFO 01-04 14:INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
153
+ INFO 01-04 14:18:13 [loggers.py:111] Engine 000: Avg prompt throughput: 617.8 tokens/s, Avg generation throughput: 65.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usagINFO 01-04 14:18:17 [loggers.py:111] EINFO 01-04 14:18:23 [loggers.py:111] Engine 000: Avg prompt throughput: 426.4 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 12.1%
154
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
155
+ INFO 01-04 14:18:33 [loggers.py:111] Engine 000: Avg prompt throughput: 429.2 tokens/s, Avg generation throughput: 74.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6INFO: 10.46.50.192:0 - "POSTINFO 01-04 14:18:43 [loggers.py:111] Engine 000: Avg prompt throughput: 668.4 tokens/s, Avg generation throughput: 93.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.6%, Prefix cache hit rate: 12.5%
156
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
157
+ INFO 01-04 14:18:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 120.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 12.5%
158
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
159
+ INFO 01-04 14:19:03 [loggers.py:111] Engine 000: Avg prompt throughput: 1429.8 tokens/s, Avg generation throughput: 101.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.8%, Prefix cache hit rate: 13.0%
160
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
161
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
162
+ INFO 01-04 14:19:13 [loggers.py:111] Engine 000: Avg prompt throughput: 580.7 tokens/s, Avg generation throughput: 115.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.0%, Prefix cache hit rate: 12.7%
163
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
164
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
165
+ INFO 01-04 14:19:23 [loggers.py:111] Engine 000: Avg prompt throughput: 664.3 tokens/s, Avg generation throughput: 77.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2INFO 01-04 14:19:25 [loggers.py:11INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
166
+ INFO 01-04 14:19:33 [loggers.py:111] Engine 000: Avg prompt throughput: 837.0 tokens/s, Avg generation throughput: 50.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usINFO: 10.43.30.3:0 - "POST /v1/complINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
167
+ INFO 01-04 14:19:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.4 tINFO 01-04 14:19:45 INFO 01-04 14:19:46 [loggers.py:111] Engine 000: Avg prompt throughput: 1119.2INFO 01-04 14:19:53 [loggers.py:111] Engine 000: Avg prompt throughput: 595.3 tokens/s, Avg generation throughput: 18.2 tINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
168
+ INFO 01-04 14:19:55INFO 01-04 14INFO 01-04 14:20:03 [loggers.py:111] Engine 000: Avg prompt throughput: 760.4 tokens/s, Avg generation throughput: 63.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.6INFO 01-04 14:20:05 [loggers.py:INFO: 10.43.30.5:0 - "PINFO 01-04 14:20:07 [loggINFO 01-04 1INFO 01-04 14:20:13 [loggers.py:111] Engine 000: Avg prompt throughput: 804.7 tokens/s, Avg generation throughput: 65.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.4%INFO 01-04 14:20:15 [loggers.py:111] Engine 000: Avg prompt throughput: 334.0 tokens/s, Avg generINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
169
+ INFO 01-04 14:20:23 [loggers.py:111] Engine 000: Avg prompt throughput: 204.6 tokens/s, Avg generation throughput: 60.3 tokenINFO 01-04 14:20:25 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:20:27 [loggers.py:11INFO 01-04 14:20:33 [loggers.py:111] Engine 000: Avg prompt throughput: 437.5 tokens/s, Avg generation throughput: 65.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, PrINFO 01-04 14:20:35 [loggerINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
170
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
171
+ INFO 01-04 14:20:43 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:20:45 [loggers.py:111] Engine 000: Avg prompt throughput: 622.2 tokens/s, Avg generation throughput: 62.4 tokens/s, Running: 2 reqs, Waiting:INFO 01-04 14:20:53 [loggers.py:111] Engine 000: Avg prompt throughput: 559.0 tokens/s, Avg generation throughput: 55.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, PrefINFO 01-04 14:20:55 [loggINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
172
+ INFO 01-04 14:21:03 [loggers.py:111] Engine 000: Avg prompt throughput: 632.5 tokens/s, Avg generation throughput: 64.5 tokens/sINFO 01-04 14:21:05 [loggers.py:111] Engine 000: Avg prompt throughput: 512.3 tokens/s, AvgINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
173
+ INFO 01-04 14:21:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 73.2 tokens/s, RINFO 01-04 14:21:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvINFO: 10.43.30.5:0 - "POST /v1/cINFO 01-04 14:21:17 [loggersINFO 01-04 14:21:23 [loggers.py:111] Engine 000: Avg prompt throuINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
174
+ INFO 01-04 14:21:25 [loggers.py:111] Engine 000: Avg prompt INFO: 10.43.30.5:0 - "POINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
175
+ INFO 01-04 14:21:33 [loggers.py:111] Engine 000: Avg prompt throughput: 423.7 tokens/s, Avg generation throughput: 25.1 tokens/s, INFO 01-04 14:21:35 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:21:37 [loggers.pyINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
176
+ INFO 01-04 14:21:43 [loggers.py:111] Engine 000: Avg prompt thrINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
177
+ INFO 01-04 14:21:45 [loggers.py:111] Engine 000: Avg prompt throughput: 566.0 tokensINFO 0INFO 01-04 14:21:53 [loggers.py:111] Engine 000: Avg prompt throughput: 600.6 tokens/s, Avg generation throughput: 38.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, PreINFO 01-04 14:21:55 [loggeINFO: 10.46.50.192:0 - "POST /v1INFO: 10.46.17.192:0 - "POINFO 01-04 14:22:03 [loggers.py:111] Engine 000: Avg prompt throughput: 313.1 tokens/s, Avg generation throughput: 34.4 tokens/INFO 01-04 14:22:05 [loggers.py:111] Engine 000: Avg prompt throughput: 671.4 tokens/s, Avg INFO 01-04 14:22:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, PrefiINFO 01-04 14:22:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/sINFO: INFO 01-04 14:22:23 [loggers.py:111] Engine 000: Avg prompt throughput: 679.5 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, PrefINFO 01-04 14:22:25 [loggers.py:111] Engine 000: Avg prompt throughput: 902.9 tokens/s, Avg INFO 01-04 14:22:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4.4 tokens/s, INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
178
+ INFO 01-04 14:22:35 [lINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
179
+ INFO 01-04 14:22:43 [loggers.py:111] Engine 000: Avg prompt throughput: 773.5 tokens/s, Avg generation throughput: 21.2 tokens/s, RINFO 01-04 14:22:45 [loggers.py:111] Engine 000: Avg prompt throughput: 664.9 tokens/s, INFO 01-04 14:22:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RunnINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
180
+ INFO 01-04 14:22:55 [INFO 01-04 14:23:03 [loggers.py:111] Engine 000: Avg prompt throughput: 794.3 tokens/s, Avg generation throughput: 42.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cINFO 01-04 14:23:05 [INFO 01-04 14:23:13 [loggers.py:111] Engine 000: Avg prompt throughput: 561.7 tokens/s, Avg generation throughput: 71.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.8%, PrefixINFO 01-04 14:23:15 [loINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
181
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
182
+ INFO 01-04 14:23:23 [loggers.py:111] Engine 000: Avg prompt throughput: 767.6 tokens/s, Avg generation throughput: 89.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cachINFO 01-04 14:23:2INFO 01-04 14:23:33 [loggers.py:111] Engine 000: Avg prompt throughput: 509.8 tokens/s, Avg generation throughput: 76.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cacINFO 01-04 14:23:35 [loggers.py:111] Engine 000: Avg prompt throughput: 702.6 tokens/INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
183
+ INFO 01-04 14:23:43 [loggers.py:111] Engine 000: Avg prompt throughput: 505.9 tokens/s, Avg generation throughput: 48.9 tokens/s, RunnINFO 01-04 14:23:45 [loggers.py:111] Engine 000: Avg prompt throughput: 765.8 tokens/INFO 01-04 14:23:53 [loggers.py:111] Engine 000: Avg prompt throughput: 540.5 tokens/s, Avg generation throughput: 48.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cacheINFO 01-04 14:23:INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
184
+ INFO 01-04 14:24:03 [loggers.py:111] Engine 000: Avg prompt throughput: 810.6 tokens/s, Avg generation throughput: 75.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache hINFO 01-04 14:2INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
185
+ INFO 01-04 14:24:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 85.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hINFO: 10.46INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
186
+ INFO 01-04 14:24:23 [loggers.py:111] Engine 000: Avg prompt throughput: 870.9 tokens/s, Avg generation throughput: 46.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 16.5%
187
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
188
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
189
+ INFO 01-04 14:24:33 [loggers.py:111] Engine 000: Avg prompt throughput: 772.6 tokens/s, Avg generation throughput: 58.9 tokens/s, Running: 0 reINFO 01-04 14:24:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 INFO 01-04 14:24:43 [loggers.py:111] Engine 000: Avg prompt throughput: 511.3 tokens/s, Avg generation throughput: 27.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit INFO 01-04 1INFO 01-04 14:24:53 [loggers.py:111] Engine 000: Avg prompt throughput: 758.4 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.3%, Prefix cache hit INFO 01-04 1INFO 01-04 14:25:03 [loggers.py:111] Engine 000: Avg prompt throughput: 539.0 tokens/s, Avg generation throughput: 91.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.8%, Prefix cache hiINFO 01-04 14:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
190
+ INFO 01-04 14:25:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 108.4 tokens/s, Running: 2INFO 01-04 14:25:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
191
+ INFO 01-04 14:25:23 [loggers.py:111] Engine 000: Avg prompt throughput: 826.9 tokens/s, Avg generation throughput: 113.8 tokens/s, RunningINFO 01-04 14:25:25 [loggers.py:111] Engine 000: Avg prompt throughput: 777.0 tokeINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
192
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
193
+ INFO 01INFO 01-04 14:25:35 [loggers.py:111] Engine 000: Avg prompt throughput: 747.1 tokens/s, Avg generation throughput: 102.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.2%, Prefix cache hiINFO: INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
194
+ INFO 01-04 14:25:43 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:25:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 111.7 tokens/s, Running: 1 reINFINFO 01-04 14:25:53 [loggers.py:111] Engine 000: Avg prompt throughput: 6INFO 01-04 14:25:55 [loggers.py:111] Engine 000: Avg prompt throughput: 545.0 tokens/s, Avg generation throughput: 90.4 tokens/s, Running: 2 reqsINFO 01-04 14:26:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.4 tokens/s, Running: 1 reINFO 01-04 14:26:05 [loggers.py:111] Engine 000: Avg prompt throughput: 377.INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
195
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
196
+ INFO 01-04 14:26:13 [loggers.py:111] Engine 000: Avg prompt throughput: 48INFO 01-04 14:26:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 50.2 tokens/s, Running: 1 rINFO INFO 01-04 14:26:23 [loggers.py:111] Engine 000: Avg prompt throughput: 251.7 tokens/s, Avg generation throughput: 41.4 tokens/s, Running:INFO 01-04 14:26:25 [loggers.py:111] Engine 000: Avg prompt throughput: 671.8 tokINFO 01-04 14:26:33 [loggers.py:111] Engine 000: Avg prompt throughput: 505.7 tokens/s, Avg generation throughput: 55.5 tokens/s, Running:INFO 01-04 14:26:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokenINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1INFO 01-0INFO 01-04 14:26:43 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:26:45 [loggers.py:111] Engine 000: Avg prompt throughput: 606.1 tokens/s, Avg generation throughput: 77.7 tokens/s, Running: 2INFO 01INFO 01-04 14:26:53 [loggers.py:111] Engine 000: Avg prompt throughput: 950.0 tokens/s, Avg generation throughput: 60.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache INFO 01-04 14:26INFO 01-04 14:27:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.8%, Prefix cache INFO 01-04 14:27:05 [loggers.py:111] Engine 000: Avg prompt throughput: 5INFO 01-04 14:27:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFO 01-04 14:27:13 [loggers.py:111] Engine 000: Avg prompt throughput: 649.2 tokens/s, Avg generation throughput: 44.7 tokens/s, RunningINFO 01-04 14:27:15 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 INFO 01-04 14:27:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 43.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache INFO 01-04 14:27INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1INFO 01-0INFO 01-04 14:27:33 [loggers.py:111] Engine 000: Avg prompt throughput: 896.8 tokens/s, Avg generation throughput: 51.0 tokens/s, RunningINFO 01-04 14:27:35 [loggers.py:111] Engine 000: Avg prompt throughput: 3INFO 01-0INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
197
+ INFO 01-04 14:27:43 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:27:45 [loggers.py:111] Engine 000: Avg prompt throughput: 842.8 tokens/s, Avg generation throughput: 68.9 tokens/s, Running: INFO 01-INFO 01-04 14:27:53 [loggers.py:111] Engine 000: Avg prompt throughput: 596.3 tokens/s, Avg generation throughput: 21.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hiINFO 01-04 14:INFO 01-04 14:28:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hiINFO 01-04 14:INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
198
+ INFO 01-04 14:28:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 20.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit ratININFO 01INFO 01-04 14:28:23 [loggers.py:111] Engine 000: Avg prompt throughput: 453.7 tokens/s, Avg generation throughput: 34.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit ratININFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
199
+ INFO 01INFO 01-04 14:28:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 2.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 1INFO INFO 01-04 14:28:43 [loggers.py:111] Engine 000: Avg prompt throughput: 566.7 tokens/s, Avg generation throughput: 13.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 17.6%
200
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
201
+ INFO 01-04 14:28:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 21.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.6%
202
+ INFO 01-04 14:29:03 [loggers.py:111] Engine 000: Avg prompt throughput: 659.1 tokens/s, Avg generation throughput: 25.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 18.1%
203
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 2INFO 0INFO 01-04 14:29:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 14.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 1INFO 01-04 14:29:25 [loggers.py:111] Engine 000: Avg prompt throughput: 686.4 tokens/s, Avg generation throughput: 30.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 22.1%
204
+ INFO 01-04 14:29:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 22.1%
205
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
206
+ INFO 01-04 14:29:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 33.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 22.1%
207
+ IINFO 01-04 14:29:56 [loggers.py:111] Engine 000: Avg prompt throughput: 709.1 tokens/s, Avg generation throughput: 0.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 16.6%IINFO 01-04 14:30:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 16.6%IINFO 01-04 14:30:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 16.6%IINFO 01-04 14:30:26 [loggers.py:111] Engine 000: Avg prompt throughput: 645.4 tokens/s, Avg generation throughput: 61.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.9%, Prefix cache hit rate: 16.4%
208
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
209
+ INFO 01-04 14:30:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 85.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 16.4%
210
+ IINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OKINFO 01-04 14:30:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 24.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.4%
211
+ INFO 01-04 14:30:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.4%
212
+ INFO 01-04 14:31:06 [loggers.py:111] Engine 000: Avg prompt throughput: 663.8 tokens/s, Avg generation throughput: 13.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 17.3%
213
+ INFO 01-04 14:31:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFO 01-04 14:31:23 [loggers.py:111] Engine 000: Avg prompt throughput: 518.0 tokens/s, Avg generation throughput: 39.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rINFO 01-04 INFO 01-04 14:31:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit INFO 01-04 1INFO 01-04 14:31:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hitINFO 01-04 14INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
214
+ INFO 01-04 14:31:53 [loggers.py:111] Engine 000: Avg prompt throughput: 914.5 tokens/s, Avg generation throughput: 50.9 tokens/s, Running: 1 rINFO 01-04 14:31:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
215
+ INFO 01-04 14:32:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 34.7 tokens/s, Running: INFO 01-04 14:32:06 [loggers.py:111] Engine 000: Avg prompt throughput: 927.5 toINFO 01-04 14:32:13 [loggers.py:111] Engine 000: Avg prompt throughput: 777.9 tokens/s, Avg generation throughput: 24.0 tokens/s, Running: 1 reINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 2INFO 01-04 14:32INFO 01-04 14:32:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cacheINFO 01-04 14:32:INFO 01-04 14:32:33 [loggersINFO 01-04 14:32:37 [loggers.py:111] Engine 000: Avg prompt throughput: 560.4 tokens/s, Avg generation throughput: 0.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:32:56 [loggers.py:111] Engine 000: Avg prompt throughput: 611.1 tokens/s, Avg generation throughput: 2.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 17.9%
216
+ INFO 01-04 14:33:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 17.9%
217
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
218
+ INFO 01-04 14:33:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 43.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.9%
219
+ INFO 01-04 14:33:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.9%
220
+ INFO 01-04 14:33:46 [loggers.py:111] Engine 000: Avg prompt throughput: 1091.1 tokens/INFO 01-04 14:34:03 [loggers.py:111] Engine 000: Avg prompt throughput: 497.1 tokens/s, Avg generation throughput: 15.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 17.9%
221
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
222
+ INFO 01-04 14:34:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 18.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usaINFO 01-04 14:34:17 [loggers.py:111] EnINFO 01-04 14:34:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usagINFO 01-04 14:34:27 [loggers.py:111] EINFO 01-04 14:34:33 [loggers.py:111] Engine 000: Avg prompt throughput: 1340.9 tokens/s, Avg generation throughput: 41.7 tokens/s, RINFO 01-04 14:34:46 [loggers.py:111] Engine 000: Avg prompt throughput: 514.1 tokens/s, Avg generation throughput: 14.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 19.0%
223
+ INFO 01-04INFO 01-04 14:35:08 [loggers.py:111] Engine 000: Avg prompt throughput: 854.2 tokens/s, Avg generation throughput: 46.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 17.0%
224
+ INFO 01-04 14:35:18 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 17.0%
225
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200INFO 01-04 14:35:35 [loggers.py:111] Engine 000: Avg prompt throughput: 958.6INFO 01-04 14:36:03 [loggers.py:111] Engine 000: Avg prompt throughput: 627.6 tokens/s, Avg generation throughput: 27.8 tokens/s, RunINFO 01-04 14:36:16 [loggers.py:111] Engine 000: Avg prompt throughput: 677.4 tokens/s, Avg generation throughput: 28.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 19.8%
226
+ INFO 01-04 14:36:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 19.8%
227
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
228
+ INFO 01-04 14:36:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 8.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.8%
229
+ INFO 01-04 14:36:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/sINFO 01-04 14:37:03 [loggers.py:111] Engine 000: Avg prompt throughput: 535.8 tokens/s, Avg generation throughput: 31.4 tokens/s, RuINFO 01-04 14:37:06 [loggers.py:111] Engine 000: Avg prompt throughput: 504.0 tokens/s,INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
230
+ INFO 01-04 14:37:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.1 tokens/s, RunninINFO 01-04 14:37INFO 01-04 14:38:06 [loggers.py:111] Engine 000: Avg prompt throughput: 1079.4 tokens/s, Avg generation throughput: 33.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 18.2%
231
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OKINFO 01-04 14:38:33 [loggers.py:111] Engine 000: Avg prompt throughput: 573.9 tokens/s, Avg generation throughput: 21.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 17.8%
232
+ INFO 01-04 14:38:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 17.8INFO 01-04 14:39:26 [loggers.py:111] Engine 000: Avg prompt throughput: 380.6 tokens/s, Avg generation throughput: 9.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 18.1%
233
+ INFO 01-04 14:39:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix cache hit rate: 18.1%
234
+ INFO 01-04 14:39:46 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:40:03 [loggers.py:111] Engine 000: Avg prompt throughput: 489.2 tokens/s, Avg generation throughput: 38.8 tokens/s, RunnINFO 01-04 14:40:06 [loggers.py:111] Engine 000: Avg prompt throughput: 1203.8 tokensINFO 01-04 14:40:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, RunninINFO 01-04 14:40:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/INFO 01-04 14:40:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, RunninINFO 01-04 14:40:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tINFO 01-04 14:41:16 [loggers.py:111] Engine 000: Avg prompt throughput: 506.6 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 18.0%
235
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
236
+ INFO 01-04 14:41:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 15.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.0%
237
+ INFO 01-04 14:41:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running:INFO 01-04 14:42:46 [loggers.py:111] Engine 000: Avg prompt throughput: 952.5 tokens/s, Avg generation throughput: 39.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 20.5%
238
+ INFO 01-04 14:42:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.1%, Prefix cache hit rate: 20.5%
239
+ INFO 01-04 14:43:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 20.5%
240
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
241
+ INFO 01-04 14:43:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 40.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.5%
242
+ INFO 01-04 14:43:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokenINFO 01-04 14:44:23 [loggers.py:111] Engine 000: Avg prompt throughput: 652.5 tokens/s, Avg generation throughput: 1.4 tokens/s, RunninINFO 01-04 14:46:36 [loggers.py:111] Engine 000: Avg prompt throughput: 571.7 tokens/s, Avg generation throughput: 28.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 20.4%
243
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
244
+ INFO 01-04 14:46:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.4%
245
+ INFO 01-04 14:46:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 20.4%
246
+ INFO 01-04 14:54:36 [loggers.py:111] Engine 000: Avg prompt throughput: 426.4 tokens/s, Avg generation throughput: 37.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 20.2%
247
+ INFO 01-04 14:54:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 INFO 01-04 14:55:35 [loggers.py:111] Engine 000: Avg prompt throughput: 431.5 tokens/s, Avg generation throughput: 15.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 25.8%
248
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
249
+ INFO 01-04 14:55:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 29.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 25.8%
250
+ INFO 01-04 14:55:55 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 25.8%
251
+ Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 20.2%
252
+ INFO 01-04 14:55:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 20.2%
253
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
254
+ INFO 01-04 14:55:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 INFO 01-04 15:02:53 [loggers.py:111] Engine 000: Avg prompt throughput: 612.3 tokens/s, Avg generation throughput: 38.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 17.8%
255
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
256
+ INFO 01-04 15:03:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 37.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.8%
257
+ INFO 01-04 15:03:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.8%
258
+ ning: 1 reqs, Waiting: 0 reqs, GPU KVINFO 01-04 14:55:51 [loggers.py:111] Engine 000: Avg prompt throughput: 437.3 tokens/s, Avg generation throughput: 43.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 16.5%
259
+ INFO 01-04 14:56:01 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 16.5%
260
+ INFO 01-04 14:56:11 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 16.5%
261
+ INFO 01-04 14:56:21 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 16.5%
262
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
263
+ INFO 01-04 14:56:31 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.5%
264
+ INFO 01-04 14:56:41 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.5%
265
+ 6 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 18.2%
266
+ INFO 01-04 14:55:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput:INFO 01-04 15:07:05 [loggers.py:111] Engine 000: Avg prompt throughput: 518.9 tokens/s, Avg generation throughput: 5.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 19.0%
267
+ INFO 01-04 15:07:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 19.0%
268
+ INFO 01-04 15:07:25 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 19.0%
269
+ INFO 01-04 15:07:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 19.0%
270
+ INFO 01-04 15:07:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 19.0%
271
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
272
+ INFO 01-04 15:07:55 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 5.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.0%
273
+ INFO 01-04 15:08:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.0%
hf_ip/vllm_gpu6.log ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
2
+ warnings.warn(
3
+ INFO 01-04 13:13:42 [__init__.py:239] Automatically detected platform cuda.
4
+ INFO 01-04 13:13:45 [api_server.py:1043] vLLM API server version 0.8.5
5
+ INFO 01-04 13:13:45 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8007, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
6
+ INFO 01-04 13:13:53 [config.py:717] This model supports multiple tasks: {'classify', 'generate', 'embed', 'reward', 'score'}. Defaulting to 'generate'.
7
+ INFO 01-04 13:13:53 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
8
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
9
+ warnings.warn(
10
+ INFO 01-04 13:13:58 [__init__.py:239] Automatically detected platform cuda.
11
+ INFO 01-04 13:14:01 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
12
+ WARNING 01-04 13:14:01 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7faff3c54e80>
13
+ INFO 01-04 13:14:03 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
14
+ INFO 01-04 13:14:03 [cuda.py:221] Using Flash Attention backend on V1 engine.
15
+ WARNING 01-04 13:14:03 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
16
+ INFO 01-04 13:14:03 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+ INFO 01-04 13:15:40 [loader.py:458] Loading weights took 97.35 seconds
35
+ INFO 01-04 13:15:41 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 97.798136 seconds
36
+ INFO 01-04 13:15:55 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
37
+ INFO 01-04 13:15:55 [backends.py:430] Dynamo bytecode transform time: 14.48 s
38
+ INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later use
39
+ INFO 01-04 13:16:50 [backends.py:148] Compiling a graph for general shape takes 53.24 s
40
+ INFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 67.72 s in total
41
+ INFO 01-04 13:17:53 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
42
+ INFO 01-04 13:17:53 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94x
43
+ INFO 01-04 13:18:32 [gpu_model_runner.py:1686] Graph capturing finished in 39 secs, took 1.21 GiB
44
+ INFO 01-04 13:18:32 [core.py:159] init engine (profile, create kv cache, warmup model) took 171.40 seconds
45
+ INFO 01-04 13:18:32 [core_client.py:439] Core engine process 0 ready.
46
+ WARNING 01-04 13:18:32 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
47
+ INFO 01-04 13:18:32 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
48
+ INFO 01-04 13:18:32 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
49
+ INFO 01-04 13:18:32 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8007
50
+ INFO 01-04 13:18:32 [launcher.py:28] Available routes are:
51
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /openapi.json, Methods: HEAD, GET
52
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /docs, Methods: HEAD, GET
53
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: HEAD, GET
54
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /redoc, Methods: HEAD, GET
55
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /health, Methods: GET
56
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /load, Methods: GET
57
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /ping, Methods: POST, GET
58
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /tokenize, Methods: POST
59
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /detokenize, Methods: POST
60
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/models, Methods: GET
61
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /version, Methods: GET
62
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
63
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/completions, Methods: POST
64
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/embeddings, Methods: POST
65
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /pooling, Methods: POST
66
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /score, Methods: POST
67
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/score, Methods: POST
68
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
69
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /rerank, Methods: POST
70
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v1/rerank, Methods: POST
71
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /v2/rerank, Methods: POST
72
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /invocations, Methods: POST
73
+ INFO 01-04 13:18:32 [launcher.py:36] Route: /metrics, Methods: GET
74
+ INFO: Started server process [1318205]
75
+ INFO: Waiting for application startup.
76
+ INFO: Application startup compleIINFINFO 01-04 14:00:02 [loggers.py:111] Engine 000: Avg prompt throughput: 111.2 tokens/s, Avg generation throughput: 85.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, Prefix cache hit rate: 0INFOINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
77
+ INFO 01-04 14:00:12 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 89.1 tokens/s, Running: 1 reqs, WIINFO 01-04 14:00:19 [loggers.py:111] Engine 000: Avg prompt througINFINFO 01-04 14:00:22 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, WaINFO 01-04 14:00:29 [loggers.py:111] Engine 000: Avg prompt throughININFO 01-04 14:00:32 [loggers.py:111] Engine 000: Avg prompt throughput: 65.5 tokens/s, Avg generation throughput: 84.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit INFO 01-04 INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OININFO 01-04 14:00:42 [loggers.py:111] Engine 000: Avg prompt throughput: 63.3 tokens/s, Avg generation throughput: 100.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hiINFO: INFO 01-04 14:00:47 [loggers.py:111] Engine 000: Avg prompt INFO 01-04INFO 01-04 14:00:52 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 52.2 tokens/s, Running: 1 reqs, WINFO 01-04 14:00:57 [loggers.py:111] Engine 000: Avg prompt tINFO 01-0INFO 01-04 14:01:02 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:01:02 [loggers.py:111] Engine 000: Avg prompt throughput: 89.3 tokens/s, Avg generation throughput: 34.6 tokens/s, Running: ININFOINFO 01-04 14:01:09 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFINFO 01-04 14:01:12 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 104.2 tokens/s, Running:INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 INFINFO 0INFO INFO 01-04 14:01:22 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hiINFOINFO INFOINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
78
+ INFO 01-04 14:01:32 [loggers.py:111] Engine 000: Avg prompt throughput: 87.2 tokens/s, Avg generation throughput: 92.6 tokens/s, RunningINFO 01-04 14:01:36 [loggers.py:111] Engine 000: Avg prompt throughput: 84.INFO 01-04 14:01:37 [loggers.py:111] Engine 000: Avg prompt throughINFINFO 01-04 14:01:42 [loggers.py:111] Engine 000: Avg prompt throughput: 58.0 tokens/s, Avg generation throughput: 73.8 tokens/s, Running:INFO 01-04 14:01:46 [loggers.py:111] Engine 000: Avg prompt throughput:INFOINFO 01-04 14:01:47 [loggers.py:111] Engine 000: Avg prompt throughpINFINFO 01-04 14:01:52 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
79
+ INFO: INFO 01-04 14:01:57 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:02:02 [loggers.py:111] Engine 000: Avg prompt throughput: 103.4 tokens/s, Avg generation throughput: 73.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hitINFO: INFINFO 01-04 14:02:13 [loggers.py:111] Engine 000: Avg prompt throughput: 90.8 tokens/s, Avg generation throughput: 131.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 1.1%
80
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTINFOINFO 01-04INFO 01-04 14:02:17 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:02:23 [loggers.py:111] Engine 000: Avg prompt throughput: 115.3 tokens/s, Avg generation throughput: 90.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit ratINFO 01IINFINFO 01-04 14:02:27 [loggers.py:111] Engine 000: Avg prompt thIINFO 01-04 14:02:33 [loggers.py:111] Engine 000: Avg prompt throughput: 90.0 tokens/s, Avg generation throughput: 69.5 tokens/s, Running: 2 reqs, Waiting: 0INFO 01-04 14:02:37 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:02:42 [loggers.py:111] Engine 000: Avg prompt throughput: 123.1 tokens/s, Avg generation throughput: 110.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 6.2INFO: 10.46.17.192:0 - "POST /v1/completions HTTPININFO 01-04 INFINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 INFO 01-04 14:02:53 [loggers.py:111] Engine 000: Avg prompt throughput: 116.6 tokens/s, Avg generation throughput: 96.2 tokens/s, Running: INFO 01-04 14:02INFO 01-04 14:02:57 [loggers.py:111] Engine 000: Avg prompt thrINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
81
+ INFO 01-04 14:03:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 79.3 tokens/s, Running: 1 INFO 01-04 14:03:06 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 ININFO 01-04 14:03:12 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:03:13 [loggers.py:111] Engine 000: Avg prompt throughput: 91.INFO 01-04 14:03:16 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:03:16 [loggers.py:111] Engine 000: Avg prompt throughput: 102.ININFO 01-04 14:03:22 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 INFO 01-04 14:03:26 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:03:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 toINFO 01-04 14:03:32 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:03:33 [loggers.py:111] Engine 000: Avg prompt throughput: 144INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
82
+ INFO 01-04 14:03:36 [loggers.py:111] Engine 000: Avg prompt throughput: 144.7 tINFO 01-04 14:03:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.6 tokens/s, Running: 2 INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
83
+ INFO INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
84
+ INFO 0INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
85
+ INFO 01-04 14:03:53 [loggers.py:111] Engine 000: Avg prompt throughput: 138INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
86
+ INFO 01-04 14:03:56 [loggers.py:111] Engine 000: Avg prompt throughput: 174.INFO:INFO 01-04 14:04:02 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:04:03 [loggers.py:111] Engine 000: Avg prompt throughput: 100.1 tokens/s, Avg generation throughput: 76.0 tokens/s, Running: 1 reqINFO 01-04 14:04:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1INFO: INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1INFO 01-04 14:04:13 [loggers.py:111] Engine 000: Avg prompt throughput: 131.2 tokens/s, Avg generation throughput: 56.6 tokens/s, Running: 1INFO 01-04 14:04:16 [loggers.py:111] Engine 000: Avg prompt throughput: 109INFO 01-04 14INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1INFO 01-04 14:04:23 [loggers.py:111] Engine 000: Avg prompt throughput: 2INFO 01-04 14:04:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.8 tokens/s, Running: 2 reqINFINFO 01-04 14:04:33 [loggers.py:111] Engine 000: Avg prompt throughput:INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
87
+ INFO 01-04 14:04:36 [loggers.py:111] Engine 000: Avg prompt throughput: 132.3 tokens/s, Avg generation throughput: 124.6 tokens/s, Running: 2 rININFO 01-04 14:04:43 [loggers.py:111] Engine 000: Avg prompt throughput: 177.2 tokens/s, Avg generation throughput: 48.3 tokens/s, Running:INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
88
+ INFO 01-04 1IINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
89
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
90
+ INFO 01-04 14:04:53 [loggers.py:111] Engine 000: Avg prompt throughput: 184.1INFO 01-04 14:04:56 [loggers.py:111] Engine 000: Avg prompt throughput: 145.4 tokens/s, Avg generation throughput: 50.6 tokens/s, Running: 2 INFO 01-04INFO 01-04 14:05:02 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:05:06 [loggers.py:111] Engine 000: Avg prompt throughput: 217.4 tokens/s, Avg generation throughput: 98.8 tokens/s, Running: 3 INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
91
+ INFO 01-04 14:05:13 [loggers.py:111] Engine 000: Avg prompt throughput: 182.3INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
92
+ INFO 01-04 14:05:16 [loggers.py:111] Engine 000: Avg prompt throughput: 174.4 tokens/s, Avg generation throughput: 119.1 tokens/s, Running: 2 rINFO 01-04 14:05:23 [loggers.py:111] Engine 000: Avg prompt throughput: 186.7 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1INFO: 10INFO 01-04 14:05:25 [loggers.py:111] Engine 000: Avg pINFO 01-04 1INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
93
+ INFO 01-04 14:05:33 [loggers.py:111] Engine 000: Avg prompt throughput: 207.8 tokens/s, Avg generation throughput: 66.2 tokens/s, Running: INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
94
+ INFO 01-04 1INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
95
+ INFO 01-04 14:05:43 [loggers.py:111] Engine 000: Avg prompt throughput: 216.8 tokens/s, Avg generation throughput: 61.3 tokens/s, Running: 1 rINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
96
+ INFO 01-04INFO 01-04 14:05:53 [loggers.py:111] Engine 000: Avg prompt throughput: 236.5 tokens/s, Avg generation throughput: 69.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rINFO 01-04INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
97
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
98
+ INFO 01-04 14:06:03 [loggers.py:111] Engine 000: Avg prompt throughput: 247.4 tokens/s, Avg generation throughput: 75.4 tokens/s, Running: 1 reqs, WaitinINFO 01-04 14:06:05 [loggers.py:111] Engine 000: Avg pINFO 01-04 INFO: 1INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.INFO 01-04 14:06:13 [loggers.py:111] Engine 000: Avg prompt throughput: 174.4 tokens/s, Avg generation throughput: 59.7 tokens/s, Running: 1INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
99
+ INFO 01-04 14:INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
100
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
101
+ INFO 01-04 14:06:23 [INFO: 10.46.17.192:0 - "POST /v1/completions HINFO 01-04 14:06:26 [loggers.py:111] Engine 000: Avg prompt throughput: 397.3 tokens/s, Avg generation throughput: 59.9 tokens/s, Running: 2 reqs, INFO 01-04 14:06:33 [loggers.py:111] Engine 000: Avg prompt throughput:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
102
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
103
+ INFO 01-04 14:06:36 [loggers.py:111] Engine 000: Avg prompt throughput: 168.3 tokenINFO 01-04 14:06:43 [loggers.py:111] Engine 000: Avg prompt throughput: 245.7 tokens/s, INFO 01-04 14:06:45 [loggers.py:111] Engine 000INFO 01-04 14:06:46 [loggers.py:111] Engine 000: Avg prompt throughput: 227.7 tokensINFO: INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 20INFO:INFO 01-04 14:06:52 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:06:53 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:06:56 [loggers.py:111] Engine 000: Avg prompt throughput: 285.1 tokens/s, AvINFO 01-04 14:07:02 [loggers.py:111] Engine 000: Avg prompt throughput: 178.4 tokens/s, Avg generation throughput: 126.1 tokens/sINFO 01-04 14:07:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg INFO: 10.46.50.192:0 - "POST /v1/coINFO 01-04 14:07:06 [loggerINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 2INFO 01-04 14:07:13 [loggers.py:111] Engine 000: Avg prompt throughput: 213.5 tokens/s, Avg generation throughput: 12.2 tokens/s, INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
104
+ INFO 01-04 14:07:16 [loggINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
105
+ INFO 01-04 14:07:23 [loggers.py:111] Engine 000: Avg prompt throughput: 244.0 tokens/s, Avg generation throughput: 20.6 tokens/s,INFO 01-04 14:07:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg INFO 01-04 14:07:33 [loggers.py:111] Engine 000: Avg prompt throughput: 210.2 tokens/s, Avg generation throughput: 70.1 tokensINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
106
+ INFO 01-04 14:07:36 [loggers.py:111] Engine 000: Avg prompt throughput: 504.4 tokens/s, Avg geINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
107
+ INFO 01-04 14:07:43 [loggers.py:111] Engine 000: Avg prompt throughput: 274.2 tokens/s, Avg generation throughput: 65.4 tokeINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
108
+ INFO 01-04 14:07:46 [loggers.INFO 01-04 14:07:53 [loggers.py:111] Engine 000: Avg prompt throughput: 247.1 tokens/s, Avg generation throughput: 79.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 13.0%
109
+ INFO: 10.46.17.192:0 - "POST /v1INFO 01-04 14:07:56 [loggers.pINFO 01-04 14:08:03 [loggers.py:111] Engine 000: Avg prompt throughput: 368.9 tokens/s, Avg generation throughput: 57.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%INFO: 10.46.17.192:0 - "POSINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
110
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
111
+ INFO 01-04 14:08:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 69.4 tokenINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
112
+ INFO 01-04 14:08:16 [loggersINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
113
+ INFO 01-04 14:08:23 [loggers.py:111] Engine 000: Avg prompt throughput: 596.5 tokens/s, Avg generation throughput: 45.5 tokeINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
114
+ INFO 01-04 14:08:26 [loggers.pyINFO 01-04 14:08:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%,INFO 01-04 14:08:36 [loggers.py:111] Engine 000: Avg prompt throughput: 405.1 tokens/s, Avg geneINFO 01-04 14:08:43 [loggers.py:111] Engine 000: Avg prompt throughput: 305.0 tINFO 01-04 14:08:42 [loggers.py:111] Engine 000: Avg prompt throughput: 281.7 tokens/s, Avg generation throughput: 77.6 tokens/s, Running: 1INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
115
+ INFO 01-04 14:08:53 [loggers.py:111] Engine 000: Avg prompt throughput: 318.9 tokens/s, Avg generation throughput: 78.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%,INFO: 10.43.30.4:0 - "POSTINFO: 10.45.190.192:0 - "POST INFO 01-04 14:08:56 [loggers.py:1INFO 01-04 14:09:03 [loggers.py:111] Engine 000: Avg prompt throughput: 235.9 tokens/s, Avg generation throughput: 56.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1INFO 01-04 14:09:06 [loggers.py:11INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
116
+ INFO 01-04 14:09:13 [loggers.py:111] Engine 000: Avg prompt throughput: 435.6 tokens/s, Avg generation throughput: 70.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1INFO 01-04 14:09:16 [loggers.py:111] Engine 000: Avg prompt throughput: 186.5 tokens/s, Avg generatiINFO 01-04 14:09:23 [loggers.py:111] Engine 000: Avg prompt throughput: 333.5 tokens/s, Avg generation throughput: 15.INFO 01-04 14:09:26 [loggers.py:111] Engine 000: Avg prompt throughput: 240.7 tokens/s, Avg generatioINFO 01-04 14:09:33 [loggers.py:111] Engine 000: Avg prompt throughput: 391.4 tokens/s, Avg generation throughput: 58INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
117
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
118
+ INFO 01-04 14:09:36 [loggers.py:111] EngiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
119
+ INFO 01-04 14:09:43 [loggers.py:111] Engine 000: Avg prompt throughput: 360.5 tokens/s, Avg generation throuINFO 01-04 14:09:46 [loggers.py:111] Engine 000: Avg prompt throughput: 673.4 tokens/s, Avg generation throughpINFO 01-04 14:09:53 [loggers.py:111] Engine 000: Avg prompt throughput: 263.7 tokens/s, Avg generation throughput: 47.8 tokens/s, Running:INFO 01-04 14:09:52 [loggers.py:111INFO: 10.45.190.192:0 - "POST /v1/completiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
120
+ INFO 01-04 14:10:03 [loggers.py:111] Engine 000: Avg prompt throughput: 377.1 tokens/s, Avg generation throughput: 82.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 21.2%
121
+ INFO: 10.45.1INFO 01-04 14:10:06 [loggers.py:111] Engine 000: AINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
122
+ INFO 01-04 14:10:13 [loggers.py:111] Engine 000: Avg prompt throughput: 199.5 tokens/s, Avg generation tINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
123
+ INFO 01-04 14:10:16 [loggers.py:111] Engine 000:INFO 01-04 14:10:23 [loggers.py:111] Engine 000: Avg prompt throughput: 370.4 tokens/s, Avg generation throughput: 74.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 22.0%
124
+ INFO: 10.46.5INFO 01-04 14:10:26 [loggers.py:111] Engine 000: INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
125
+ INFO 01-04 14:10:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation tINFO 01-04 14:10:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
126
+ INFO 01-04 14:10:43 [loggers.py:111] Engine 000: Avg prompt throughput: 376.7 tokens/s, Avg generation INFO 01-04 14:10:46 [loggers.py:111] Engine 000: Avg prompt throughput: 138.2 tokens/s, Avg generation throughput: 2INFO 01-04 14:10:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation tINFO 01-04 14:10:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4INFO 01-04 14:11:03 [loggers.py:111] Engine 000: Avg prompt throughput: 245.0 tokens/s, Avg generation throughput: 33.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPINFO 01-04 14:11:06 [loggers.py:111] Engine 000: Avg INFO 01-04 14:11:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 20.8%
127
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
128
+ INFO 01-04 14:11:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 23.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, INFO 01-04 14:11:26 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:11:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RuINFO 01-04 14:11:46 [loggers.pyINFO 01-04 14:11:46 [loggers.py:111] Engine 000: Avg prompt throughput: 303.1 tokens/s, Avg generation throughput: 26.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
129
+ INFO 01-04 14:11:56 [loggerINFO 01-04 14:11:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 9.6 tokens/s,INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OINFO 01-04 14:12:06 [loggINFO 01-04 14:12:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, PrefINFO 01-04 14:12:16 [logINFO 01-04 14:12:16 [loggers.py:111] Engine 000: Avg prompt throughput: 424.1 tokens/s, Avg generation throughput: 31.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, PrefINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
130
+ INFO 01-04 14:12:26 [loggeINFO 01-04 14:12:26 [loggers.py:111] Engine 000: Avg prompt throughput: 757.5 tokens/s, Avg generation throughput: 80.4 tokens/s, RuINFO 01-04 14:12:32 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:12:36 [loggers.py:111] Engine 000: Avg prompt throughput: 400.8 tokens/s, Avg INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
131
+ INFO 01-04 14:12:36 [loggers.py:111] Engine 000: Avg prompt INFO: INFO 01-04 14:12:42 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:12:46 [loggers.py:111] Engine 000: Avg prompt throughput: 319.9 tokens/s, AINFO 01-04 14:12:46 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:12:52 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:12:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, INFO 01-04 14:12:56 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:13:02 [loggers.py:111] Engine 000: Avg prompt throughput: 260.8 tokens/s, Avg generation throughput: 58.2 tokeINFO 01-04 14:13:06 [INFO 01-04 14:13:06 [loggers.py:111] Engine 000: Avg prompt throughput: 406.1 tokens/s, Avg generation throughput: 49.6 tokens/s, Running: 2 INFO 01-04 14:13:12 [loggers.py:111] Engine 000: Avg prompt throughput: 260.2INFO 01-04 14:13:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rateINFO 01-04 14:13:22 [loggers.py:111] Engine 000: Avg pINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
132
+ INFO 01-04 14:13:INFO 01-04 14:13:26 [loggers.py:111] Engine 000: Avg prompt throughput: 187.4 toINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
133
+ INFO 01-04 14:13:32 [loggers.py:111] Engine 000: Avg INFO 01-04 14:13:36INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
134
+ INFO 01-04 14:13:36 [loggers.py:111] Engine 000: Avg prompt throughput: 645.0 tokINFO 01-04 14:13:42 [loggers.py:111] Engine 000: AvgINFO 01-04 14:13:46 [loggers.py:111] Engine 000: Avg prompt throughput: 186.9 tokens/s, Avg generation throughput: 76.2 tokens/s, Running: 2 reqs, WaitINFO: 10.46.INFO 01-04 14:13:52 [loggers.py:111] Engine 000: AvgINFO 01-04 14:13:56 [loggers.py:111] Engine 000: Avg prompt throughput: 350.4 tokens/s, Avg generation throughput: 118.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPINFO 01-04 14:14:02 [loggers.py:111] Engine 000: Avg INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
135
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
136
+ INFO: 10.46.50.192:0 - "POST /v1INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
137
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
138
+ INFO 01-04 14:14:12 [loggers.py:111] Engine 000: Avg prompt throughput: 354.7 tokens/s, Avg generation throughput: 159.INFO 01-04 14:14:16 [loggers.py:111] Engine 000: Avg prompt throughput: 367.6 tokINFO 01-04 14:14:16 [loggers.py:111] Engine 000: Avg prompt throughput: 797.4 tokens/INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
139
+ INFO 01-04 14:14:22 [loggers.py:111] Engine 000: AvgINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
140
+ INFO 01-04 14:14:26 [loggers.py:111] Engine 000: Avg prompt throughput: 309.4 tokens/s, Avg generation throughput: 64.5 tokens/s, Running: 1 reINFO 01-04 14:14:26 [loINFO: 10.46.50.192:0 - "POST /v1/completions HTTPINFO 01-04 14:14:36 [loggers.py:111] Engine 000: Avg prompt throughput: 372.2 tokens/s, Avg generation throughput: 90.5 tokens/s, Running: 2 rINFO 01-04 14:14:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tINFO: 10INFO 01-04 14:14:42 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tokeINFO 01-04 14:14:46 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:14:46 [lINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
141
+ INFO 01-04 14:14:52 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:14:56 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:14:56 [loggers.py:111] Engine 000: Avg prompt throughput: 1015.6 tokens/sINFO 01-04 14:15:02 [loggers.py:111] Engine 000: Avg prompt throughput: 346.2 tokens/s, Avg generation throughput: 108.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.6%, INFO 01-04 14:15:06 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:15:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg INFO 01-04 14:15:12 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:15:16 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:15:16 [loggers.py:111] Engine 000: Avg prompt throughput: 905.5 tokens/s, Avg generation throughput: 88.9 tokens/s, Running: 3 reqs, INFO: 1INFO 01-04 14:15:22 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:15:26 [loggers.py:111] Engine 000: Avg prompt throughputINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
142
+ INFO 01-04 14:15:26 [loggers.py:111] Engine 000: Avg prompt throughput: 227.3 tokenINFO: INFO 01-04 14:15:32 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:15:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 69.6 tokens/s, RunningINFO 01-04 14:15:36 [loggers.py:111] Engine 000: Avg prompt throughput: 372.0 tokeINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
143
+ INFO 01-04 14:15:46 [loggers.py:111] Engine 000: Avg prompt throughput: 422.2 tokens/s, Avg generation throughput: 52.1 tokens/s, RunniINFO 01-04 14:15:46 [loggers.py:111] Engine 000: Avg prompt throughput: 430.3 tokensINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
144
+ INFO 01-04 14:15:56 [loggers.py:111] Engine 000: Avg prompt throughput: 471.9 tokens/s, Avg generation throughput: 66.6 tokens/s, Running: 1 reqs, Waiting: 0 INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 2INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
145
+ INFO 01-04 14:16:06 [loggers.py:111] Engine 000: Avg prompt throughput: 286.2 tokens/s, Avg generation throughput: 78.7 tokens/s, RunnINFO 01-04 14:16:06 [loggINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 2INFO 01-04 14:16:16 [loggers.py:111] Engine 000: Avg prompt throughput: 508.3 tokens/s, Avg generation throughput: 75.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cINFO 01-04 14:16:16 [INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
146
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
147
+ INFO 01-04 14:16:26 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:16:26 [loggers.py:111] Engine 000: Avg prompt throughput: 454.6 tokens/s, Avg generation throughput: 114.2 tokens/s, Running: 3 reqs, WINFO: INFO 01-04 14:16:32 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:16:36 [loggers.py:111] Engine 000: Avg prompt throughput: 510.4 tokens/s, Avg generation throughput: 51.3 tokens/s, RunningINFO 01-04 14:16:36 [logINFO 01-04 14:16:42 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.4 tokeINFO 01-04 14:16:46 [loggers.py:111] Engine 000: Avg prompt throughput: 534.8 tokens/s, Avg generation throughput: 38.8 tokens/s, Running: 1 reqs, Waiting: 0 reqsINFO 01-04 14:16:52 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:16:56 [loggers.py:111] Engine 000: Avg prompt throughput: 561.4 tokens/s, Avg generation throughput: 50.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cache hit rate: 15.6%
148
+ INFO: INFO 01-04 14:17:02 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:17:06 [loggers.py:111] Engine 000: Avg prompt throughput: 556.6 tokens/s, Avg generation throughput: 83.4 tokens/s, Running: 2 reqs, Waiting: 0 reqsINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
149
+ INFO 01-04 14:17:16 [loggers.py:111] Engine 000: Avg prompt throughput: 648.5 tokens/s, Avg generation throughput: 93.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.0%, Prefix cacheINFO 01-04 14:17:INFO: INFO 01-04 14:17:22 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:17:26 [loggers.py:111] Engine 000: Avg prompt throughput: 525.0 tokens/s, Avg generation throughput: 81.9 tokens/s, RunINFO 01-04 14:17:26 [loggers.py:111] Engine 000: Avg prompt throughput: 724.7 tokens/sINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
150
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
151
+ INFO 01-04 14:17:36 [loggers.py:111] Engine 000: Avg prompt throughput: 236.9 tokens/s, Avg generation throughput: 59.5 tokens/s, RunnINFO 01-04 14:17:36 [loggers.py:111] Engine 000: Avg prompt throughput: 389.6 tokens/INFO 01-04 14:17:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.8 tokens/s, RunninINFO 01-04 14:17:46 [loggers.py:111] Engine 000: Avg prompt throughput: 882.6 tokenINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
152
+ INFO 01-04 14:17:56 [loggers.py:111] Engine 000: Avg prompt throughput: 583.1 tokens/s, Avg generation throughput: 59.7 tokens/s, Running: INFO 01-04 14:17:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokeINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
153
+ INFO 01-04 14:18:06 [loggers.py:111] Engine 000: Avg prompt throughput: 516.2 tokens/s, Avg generation throughput: 74.6 tokens/s, RunninINFO 01-04 14:18:06 [logINFO 01-04 14:18:12 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:18:16 [loggers.py:111] Engine 000: Avg prompt throughput: 388.1 tokens/s, Avg generation throughput: 108.3 tokens/s, Running: 3 reqs, Waiting: 0 rINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
154
+ INFO 01-04 14:18:22 [loggers.py:111] Engine 000: Avg prompt thINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
155
+ INFO 01-04 14:18:26 [loggers.py:111] Engine 000: Avg prompt throughput: 305.7 tokens/s, Avg generation throughput: 93.2 tokens/s, Running: 2 reqs, Waiting: 0 rINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200INFO 01-04 14:18:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hitINFO 01-04 14INFO: INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
156
+ INFO 01-04 14:18:46 [loggers.py:111] Engine 000: Avg prompt throughput: 945.5 tokens/s, Avg generation throughput: 112.7 tokens/s, Running: 2 reqs, Waiting: 0 INFO 01-04 14:18:52 [loggers.py:111] Engine 000: Avg prompt throughput: 192.7 tokens/s, Avg generation throughput: 63.7 tokenINFO 01-04 14:18:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.3 tokens/s, Running: 1 reqs, Waiting: 0 reINFO 01-04 14:19:02 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:19:06 [loggers.py:111] Engine 000: Avg prompt throughput: 605.3 tokens/s, Avg generation throughput: 83.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 17.7%
157
+ INFO 01-04 14:19:16 [loggers.py:111] Engine 000: Avg prompt throughput: 734.9 tokens/s, Avg generation throughput: 103.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.6%, Prefix cache hit rate: 17.2%
158
+ INFO 01-04 14:19:26 [loggers.py:111] Engine 000: Avg prompt throughput: 665.1 tokens/s, Avg generation throughput: 129.9 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 10.7%, Prefix cache hit rate: 16.8%
159
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
160
+ INFO 01-04 14:19:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 151.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.1%, Prefix cache hit rate: 16.8%
161
+ INFO: INFO 01-04 14:19:42 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:19:46 [loggers.py:111] Engine 000: Avg prompt throughput: 1336.2 tokens/s, Avg generation throughput: 130.0 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 11.0%, Prefix cache hit rate: 18.3%
162
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
163
+ INFOINFO 01-04 14:19:52 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:19:56 [loggers.py:111] Engine 000: Avg prompt throughput: 443.6 tokens/s, Avg generation throughput: 117.0 tokens/s, Running: 3 reqs, WaitingINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OININFO 01-04 14:20:02 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:20:06 [loggers.py:111] Engine 000: Avg prompt throughput: 542.3 tokens/s, Avg generation throughput: 106.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.9%, Prefix cache hitINFO 01-04 14IINFO 01-04 14:20:12 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:20:16 [loggers.py:111] Engine 000: Avg prompt throughput: 610.3 tokens/s, Avg generation throughput: 114.6 tokens/s, Running: 3 reqs, WaitinINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
164
+ INFO 01-04 14:20:22 [loggers.py:111] Engine 000: Avg prompt throINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
165
+ INFO 01-04 14:20:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg gINFO 01-04 14:20:32 [loggers.py:111] Engine 000: Avg prompt throughput: 778.7 tokens/s, Avg generation throughput: 60.1 tokens/INFO 01-04 14:20:36 [loggers.py:111] Engine 000: Avg prompt throughput: 1211.9 tokens/s, Avg generation throughput: 69.9 tokens/s, Running: 3 reqs, Waiting:INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
166
+ INFO 01-04 14:20:42 [loggers.py:111] Engine 000: Avg prompt throINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
167
+ INFO 01-04 14:20:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0INFO 01-04 14:2INFO 01-04 14:20:52 [loggers.py:111] Engine 000: Avg prompt throughput: 664.6 tokens/s, Avg generation throughput: 58.0 tokens/s, INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
168
+ INFO 01-04 14:20:56 [loggers.py:111] Engine 000: Avg prompt throughput: 568.1 tokens/s,INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
169
+ INFO 01-04 14:21:02 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:21:06 [loggers.py:111] Engine 000: Avg prompt throughput: 353.6 tokens/s, Avg generation throughput: 71.0 tokens/s, Running: INFO 01-04 1INFO 01-04 14:21:12 [loggers.py:111] Engine 000: Avg prompt throughput: 610.5 tokens/s, Avg generation throughput: 28.6 tokens/s, RunnINFO 01-04 14:21:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.INFO: 1INFO 01-04 14:21:22 [loggers.py:111] Engine 000: Avg prompt throughput: 632.5 tokens/s, Avg generation throughput: 69.1 tokens/s, RuINFO 01-04 14:21:26 [loggers.py:111] Engine 000: Avg prompt throughput: 1078.0 tokens/s, Avg generation throughput: 91.3 tokens/s, Running: 3 reqs, WaitiINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
170
+ INFO 01-04 14:21:32 [loggers.py:111] Engine 000: Avg prompt throughINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
171
+ INFO 01-04 14:21:36 [loggers.py:111] Engine 000: Avg prompt throughput: 444.3 tokens/sINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
172
+ INFO 01-04 14:21:42 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:21:46 [loggers.py:111] Engine 000: Avg prompt throughput: 616.4 tokens/s, Avg generation throughput: 87.4 tokens/s, Running: 3 reqs, WaINFO 01-04 14:21:52 [loggers.py:111] Engine 000: Avg prompt throughput: 477.0 tokens/s, Avg generation throughput: 50.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cache hit rate: 15.5%
173
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
174
+ INFO 01-04 14:22:02 [loggers.py:111] Engine 000: Avg prompt throughput: 818.0 tokens/s, Avg generation throughput: 81.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.1%, Prefix cache hit rate: 15.2%
175
+ INFO 01-04 14:22:12 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.5%, Prefix cache hit rate: 15.2%
176
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
177
+ INFO 01-04 14:22:22 [loggers.py:111] Engine 000: Avg prompt throughput: 302.9 tokens/s, Avg generation throughput: 91.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.5%, Prefix cache hit rate: 15.0%
178
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTPINFO 01-04 14INFO 01-04 14:22:32 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 55.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hitINFO 01-04 14INFO 01-04 14:22:42 [loggers.py:111] Engine 000: Avg prompt throughput: 846.2 tokens/s, Avg generation throughput: 76.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.1%, Prefix cache hit rate: 14.7%
179
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTPINFO 01-04 14INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
180
+ INFO 01-04 14:22:52 [loggers.py:111] Engine 000: Avg prompt throughput: 737.4 tokens/s, Avg generation throughput: 73.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hINFO 01-04 14:22:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0INFO 0INFO 01-04 14:23:02 [loggers.py:111] Engine 000: Avg prompt throughput: 610.5 tokens/s, Avg generation throughput: 33.9 tokens/s, RunninINFO 01-04 14:23:06 [loggers.py:111] Engine 000: Avg prompt throughput: 437.0INFO 0INFO 01-04 14:23:12 [loggers.py:111] Engine 000: Avg prompt throughput: 462.1 tokens/s, Avg generation throughput: 60.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cacheINFO 01-04INFO 01-04 14:23:16 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:23:22 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 56.1 tokens/s, Running:INFO 01-04 14:23:26 [loggers.py:111] Engine 000: Avg prompt throughput: 820.2 tokens/s, Avg generation throughput: 18.6 tokens/s, Running: 1 reqINFINFO 01-04 14:23:32 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:23:36 [loggers.py:111] Engine 000: Avg prompt throughput: 706.9 tokens/s, Avg generation throughput: 61.3 tokens/s, Running: 2 reqs, INFO 01-04 14:23:42 [loggers.py:111] Engine 000: Avg prompt throughput: 827.2 tokens/s, Avg generation throughput: 62.7 tokens/s, RunninINFO 01-04 14:23:46 [loggers.py:111] Engine 000: Avg prompt throughput: 702.2 tokenINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
181
+ INFO 01-04 14:23:52 [loggers.py:111] Engine 000: Avg prompt throughput: 886.0 tokens/s, Avg generation throughput: 64.8 tokens/s, RunnINFO 01-04 14:23:56 [loggers.py:111] Engine 000: Avg prompt throughput: 677.2 tokens/INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
182
+ INFO 01-04 14:24:02 [loggers.py:111] Engine 000: Avg prompt throughput: 616.8 tokens/s, Avg generation throughput: 86.3 tokens/s, RuINFO 01-04 14:24:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 52.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 1INFO INFO 01-04 14:24:16 [loggers.py:111] Engine 000: Avg prompt throughput: 617.8 tokens/s, Avg generation throughput: 76.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.7%, Prefix cache hit rate: 1INFO IINFO 01-04 14:24:22 [loggers.py:111] Engine 000: Avg prompt throuINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
183
+ INFO 01-04 14:24:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, INFO 01-04 14:24:32 [loggers.py:111] Engine 000: Avg prompt throughput: 735.9 tokens/s, Avg generation throughput: 66.0 tokens/s, RINFO 01-04 14:24:36 [loggers.py:111] Engine 000: Avg prompt throughput: 931.7 tokens/s, INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
184
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
185
+ IINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
186
+ INFO 01-04 14:24:46 [loggers.py:111] Engine 000: Avg prompt throughput: 700.4 tokens/s, Avg generation throughput: 50.5 tokens/s, Running: 1 reqs, WaINFO 01-04 14:24:52 [loggers.py:111] Engine 000: Avg prompt throughputINFO 01-04 14:24:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, WaitINFO 01-04 14:25:02 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, RunningINFO 01-04 14:25:06 [loggers.py:111] Engine 000: Avg prompt throughput: 744.9 tokeINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
187
+ INFO 01-04 14:25:12 [loggers.py:111] Engine 000: Avg prompt throughput:INFO 01-04 14:25:16 [loggers.py:111] Engine 000: Avg prompt throughput: 1012.7 tokens/s, Avg generation throughput: 63.6 tokens/s, Running: 2 reqs, INFO 01-04 14:25:22 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:25:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.6%, Prefix cache hit rate: 17INFOINFO 01-04 14:25:36 [loggers.py:111] Engine 000: Avg prompt throughput: 1362.4 tokens/s, Avg generation throughput: 119.0 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 13.4%, Prefix cache hit rate:INFO 01-04 14:25:42 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 2 rINFO 01-04 14:25:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
188
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
189
+ INFO 01-04 14INFO 01-04 14:25:56 [loggers.py:111] Engine 000: Avg prompt throughput: 559.5 tokens/s, Avg generation throughput: 107.6 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 9.8%, Prefix cache hiINFO 01-04 14:26:02 [loggers.py:111] Engine 000: Avg prompt throughput: 777.3 tokens/s, Avg generation throughput: 58.6 tokens/s, Running: 2 reqs, INFO 01-04 14:26:06 [loggers.py:111] Engine 000: Avg prompt throughput: INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
190
+ INFO 01-04 14:26:12 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokenINFO 01-04 14:26:16 [loggers.py:111] Engine 000: Avg prompt throughput: 872.4 tokens/s, Avg generation throughput: 66.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.6%, Prefix cache INFO 01-04 14:26INFO 01-04 14:26:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 88.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 9.0%, Prefix cacheINFO 01-04 14:26:32 [loggers.py:111] Engine 000: Avg prompt throughput: 806.4 tokensINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
191
+ INFO 01-04 14:26:36 [loggers.py:111] Engine 000: Avg prompt throughpuINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
192
+ INFO 01-04 14:26:42 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, INFO 01-04 14:26:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.2 tokens/s, RunINFO 01-04 14:26:52 [loggers.py:111] Engine 000: Avg prompt throughput: 355.6 tokens/sINFO 01-04 14:26:56 [loggers.py:111] Engine 000: Avg prompt throughput: 741.0 tokens/s, Avg generation throughput: 54.7 tokens/s, RunINFO 01-04 14:27:02 [loggers.py:111] Engine 000: Avg prompt throughput: 874.8 tokens/sINFO 01-04 14:27:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 89.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.8%, Prefix cache hit rate: 19.9%
193
+ INFO: 10.45.190.192:0 - "POST /v1/completiINFO 01-04 14:27:12 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
194
+ INFO 01-04 14:27:16 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:27:22 [loggers.py:111] Engine 000: Avg prompt throughput: 818.3 tokens/s, Avg generation throughput: 22.2 tokens/s, Running: 1 reqs, Waiting: INFO 01-04 14:27:26 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:27:32 [loggers.py:111] Engine 000: Avg prompt throughput: 627.7 tokens/s, Avg generation throughput: 58.7 tokens/s, Running: 2 reqs, Waiting: INFO 01-04 14:27:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, RuINFO 01-04 14:27:42 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 53.6 tokens/s, Running: 1 reqs, Waiting:INFO 01-04 14:27:46 [loggers.py:111] Engine 000: Avg prompt throINFO 01-04 14:27:52 [loggers.py:111] Engine 000: Avg prompt throughput: 842.7 tokens/s, Avg generation throughput: 84.9 tokens/s, Running: 2 reqs, Waiting:INFO 01-04 14:27:56 [loggers.py:111] Engine 000: Avg prompt throINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
195
+ INFO 01-04 14:28:02 [loggers.py:111] Engine 000: Avg prompt throughput: 771.6 tokens/s, Avg generation throughput: 84.7 tokens/s, Running: 2 reqs, WaitiINFO 01-04 14:28:06 [loggers.py:111] Engine 000: Avg prompt throughINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
196
+ INFO 01-04 14:28:12 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvINFO 01-04 14:28:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 44.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, PrefiINFO 01-04 14:28:22 [loggers.py:111] Engine 000: Avg prompt throughput: 355.3 tokens/s, INFO 01-04 14:28:26 [loggers.py:111] Engine 000: Avg prompt throughput: 619.1 tokens/s, Avg generation throughput: 58.5 tokens/s, RINFO 01-04 14:28:32 [loggers.py:111] Engine 000: Avg prompt throughput: 799.2 tokens/s, INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
197
+ INFO 01-04 14:28:36 [loggers.py:111] Engine 000: Avg prompt throINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
198
+ INFO 01-04 14:28:42 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvgINFO 01-04 14:28:46 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:28:46 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:28:52 [loggers.py:111] Engine 000: Avg prompt throughput: 551.0 tokens/s, AvgINFO 01-04 14:28:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, RuINFO 01-04 14:28:56 [loggers.py:111] Engine 000: Avg prompt throINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
199
+ INFO 01-04 14:29:02 [INFO 01-04 14:29:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.6 tokens/s, RunniINFO 01-04 14:29:06 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:29:12 [INFO 01-04 14:29:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RunninINFO 01-04 14:29:16 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:29:22 [INFO 01-04 14:29:26 [loggers.py:111] Engine 000: Avg prompt throughput: 714.5 tokens/s, Avg generation throughput: 24.3 tokens/s, RunINFO 01-04 14:29:26 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:29:32 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AINFO 01-04 14:29:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 23.1 tokens/s, RuINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
200
+ INFO 01-04 14:29:42 [INFO 01-04 14:29:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RunninINFO 01-04 14:29:46 [loggers.py:111] Engine 000: Avg prompt thrINFO 01-04 14:29:52 INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
201
+ INFO 01-04 14:30:06 [loggers.py:111] Engine 000: Avg prompt throughput: 902.4 tokens/s, Avg generation throughput: 27.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.2%
202
+ INFO 01-04 14:30:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 19.2%
203
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
204
+ INFO 01-04 14:30:46 [loggers.py:111] Engine 000: Avg prompt throughput: 865.1 tokens/s, Avg generation throughput: 23.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.9%
205
+ INFO 01-04 14:30:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.9%
206
+ INFO 01-04 14:31:16 [loggers.py:111] Engine 000: Avg prompt throughput: 895.3 tokens/s, Avg generation throughput: 44.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 18.6%
207
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
208
+ INFO 01-04 14:31:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 25.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.6%
209
+ INFO 01-04 14:31:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.6%
210
+ INFO 01-04 14:31:46 [loggers.py:111] Engine 000: Avg prompt throughput: 647.1 tokens/s, Avg generation throughput: 23.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 18.4%
211
+ INFO 01-04 14:31:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 18.4%
212
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
213
+ INFO 01-04 14:32:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 10.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.4%
214
+ INFO 01-04 14:32:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.4%
215
+ INFO 01-04 14:32:26 [loggers.py:111] Engine 000: Avg prompt throughput: 633.5 tokens/s, Avg generation throughput: 20.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 18.2%
216
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
217
+ INFO 01-04 14:32:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 28.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.2%
218
+ INFO 01-04 14:32:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.2%
219
+ INFO 01-04 14:33:06 [loggers.py:111] Engine 000: Avg prompt throughput: 468.8 tokens/s, Avg generation throughput: 29.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.0%, Prefix cache hit rate: 18.0%
220
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
221
+ INFO 01-04 14:33:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.0%
222
+ INFO 01-04 14:33:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.0%
223
+ INFO 01-04 14:33:46 [loggers.py:111] Engine 000: Avg prompt throughput: 709.4 tokens/s, Avg generation throughput: 15.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 17.8%
224
+ INFO 01-04 14:33:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 17.8%
225
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
226
+ INFO 01-04 14:34:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 21.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.8%
227
+ INFO 01-04 14:34:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.8%
228
+ INFO 01-04 14:35:06 [loggers.py:111] Engine 000: Avg prompt throughput: 415.5 tokens/s, Avg generation throughput: 18.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix cache hit rate: 17.7%
229
+ INFO 01-04 14:35:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 17.7%
230
+ INFO 01-04 14:35:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 17.7%
231
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
232
+ INFO 01-04 14:35:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.7%
233
+ INFO 01-04 14:35:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.7%
234
+ INFO 01-04 14:36:16 [loggers.py:111] Engine 000: Avg prompt throughput: 1200.1 tokens/s, Avg generation throughput: 17.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.8%, Prefix cache hit rate: 17.3%
235
+ INFO 01-04 14:36:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.0%, Prefix cache hit rate: 17.3%
236
+ INFO 01-04 14:36:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.2%, Prefix cache hit rate: 17.3%
237
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
238
+ INFO 01-04 14:36:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 16.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.3%
239
+ INFO 01-04 14:36:56 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.3%
240
+ INFO 01-04 14:37:06 [loggers.py:111] Engine 000: Avg prompt throughput: 1476.1 tokens/s, Avg genINFO 01-04 14:37:15 [loggers.py:111] Engine 000: Avg prompt throughput: 995.5 tokens/s, Avg generation throughput: 3.7 tokenINFO 01-04 14:37:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg geneINFO 01-04 14:37:25 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 4INFO 01-04 14:38:06 [loggers.py:111] Engine 000: Avg prompt throughput: 834.8 tokens/s, Avg generation throughput: 6.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 14.9%
241
+ INFO 01-04 14:38:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 14.9%
242
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
243
+ INFO 01-04 14:38:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatioINFO 01-04 14:39:05 [loggers.py:111] Engine 000: Avg prompt throughput: 1156.2 tokens/s, Avg generation throughput: 2.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cache hit rate: 17.7%
244
+ INFO 01-04 14:39:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46INFO 01-04 14:39:56 [loggers.py:111] Engine 000: Avg prompt throughput: 666.3 tokens/s, Avg generation throughput: 37.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 15.6%
245
+ INFO 01-04 14:40:06 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 15.6%
246
+ INFO 01-04 14:40:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GINFO 01-04 14:41:35 [loggers.py:111] Engine 000: Avg prompt throughput: 626.1 tokens/s, Avg generation throughput: 13.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 17.5%
247
+ INFO 01-04 14:41:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 17.5%
248
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
249
+ INFO 01-04 14:41:55 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 13.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.5%
250
+ INFO 01-04 14:42:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokenINFO 01-04 14:43:16 [loggers.py:111] Engine 000: Avg prompt throughput: 719.8 tokens/s, Avg generation throughput: 5.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 17.5%
251
+ INFO 01-04 14:43:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 17.5%
252
+ INFO 01-04 14:43:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 17.5%
253
+ INFO 01-04 14:43:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 reqsINFO 01-04 14:45:05 [loggers.py:111] Engine 000: Avg prompt throughput: 1882.9 tokens/s, Avg generation throughput: 35.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.5%, Prefix cache hit rate: 16.9%
254
+ INFO 01-04 14:45:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.7%, Prefix cache hit rate: 16.9%
255
+ INFO 01-04 14:45:25 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.9%, Prefix cache hit rate: 16.9%
256
+ INFO 01-04 14:45:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.5 tokens/s, Running: 1 reqs, Waiting: 0INFO 01-04 14:46:09 [loggers.py:111] Engine 000: Avg prompt throughput: 463.5 tokens/s, Avg generation throughput: 15.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 16.6%
257
+ INFO 01-04 14:46:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 16.6%
258
+ INFO 01-04 14:46:29 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 16.6%
259
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
260
+ INFO 01-04 14:46:39 [loggers.py:111] Engine 000: Avg promINFO 01-04 14:48:45 [loggers.py:111] Engine 000:INFO 01-04 14:49:33 [loggers.py:111] Engine 000: Avg prompt throughput: 1074.6 tokens/s, Avg generation throughput: 45.6 tokens/INFO 01-04 14:59:23 [loggers.py:111] Engine 000: Avg prompt throughput: 471.3 tokens/s, Avg generation throughput: 1.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 13.3%
261
+ INFO 01-04 14:59:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 13.3%
262
+ INFO 01-04 14:59:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 13.3%
263
+ INFO 01-04 14:59:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation througINFO 01-04 15:10:35 [loggers.py:111] Engine 000: Avg prompt throughput: 561.0 tokens/s, Avg generation throughput: 8.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 17.1%
264
+ INFO 01-04 15:10:45 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 17.1%
265
+ INFO 01-04 15:10:55 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 17.1%
266
+ INFO 01-04 15:11:05 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 17.1%
267
+ INFO 01-04 15:11:15 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 17.1%
268
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
269
+ INFO 01-04 15:11:25 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 13.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.1%
270
+ INFO 01-04 15:11:35 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.1%
271
+ 6.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
272
+ INFO 01-04 15:04:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.6%
273
+ INFO 01-04 15:04:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.6%
274
+ 1-04 15:01:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 13.3%
275
+ INFO 01-04 15:01:43 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 13.3%
276
+ INFO 01-04 15:01:53 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.6%, Prefix cache hit rate: 13.3%
277
+ INFO 01-04 15:02:03 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.8%, Prefix cache hit rate: 13.3%
278
+ INFO 01-04 15:02:13 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 45.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.0%, Prefix cache hit rate: 13.3%
279
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
280
+ INFO 01-04 15:02:23 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 6.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.3%
281
+ INFO 01-04 15:02:33 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.3%
282
+ ] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 30.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.0%
283
+ INFO 01-04 14:58:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 17.0%
hf_ip/vllm_gpu7.log ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
2
+ warnings.warn(
3
+ INFO 01-04 13:13:45 [__init__.py:239] Automatically detected platform cuda.
4
+ INFO 01-04 13:13:48 [api_server.py:1043] vLLM API server version 0.8.5
5
+ INFO 01-04 13:13:48 [api_server.py:1044] args: Namespace(host='127.0.0.1', port=8008, uvicorn_log_level='info', disable_uvicorn_access_log=False, allow_credentials=False, allowed_origins=['*'], allowed_methods=['*'], allowed_headers=['*'], api_key=None, lora_modules=None, prompt_adapters=None, chat_template=None, chat_template_content_format='auto', response_role='assistant', ssl_keyfile=None, ssl_certfile=None, ssl_ca_certs=None, enable_ssl_refresh=False, ssl_cert_reqs=0, root_path=None, middleware=[], return_tokens_as_token_ids=False, disable_frontend_multiprocessing=False, enable_request_id_headers=False, enable_auto_tool_choice=False, tool_call_parser=None, tool_parser_plugin='', model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', task='auto', tokenizer=None, hf_config_path=None, skip_tokenizer_init=False, revision=None, code_revision=None, tokenizer_revision=None, tokenizer_mode='auto', trust_remote_code=True, allowed_local_media_path=None, load_format='auto', download_dir=None, model_loader_extra_config={}, use_tqdm_on_load=True, config_format=<ConfigFormat.AUTO: 'auto'>, dtype='auto', max_model_len=131072, guided_decoding_backend='auto', reasoning_parser=None, logits_processor_pattern=None, model_impl='auto', distributed_executor_backend=None, pipeline_parallel_size=1, tensor_parallel_size=1, data_parallel_size=1, enable_expert_parallel=False, max_parallel_loading_workers=None, ray_workers_use_nsight=False, disable_custom_all_reduce=False, block_size=None, gpu_memory_utilization=0.9, swap_space=4, kv_cache_dtype='auto', num_gpu_blocks_override=None, enable_prefix_caching=None, prefix_caching_hash_algo='builtin', cpu_offload_gb=0, calculate_kv_scales=False, disable_sliding_window=False, use_v2_block_manager=True, seed=None, max_logprobs=20, disable_log_stats=False, quantization=None, rope_scaling=None, rope_theta=None, hf_token=None, hf_overrides=None, enforce_eager=False, max_seq_len_to_capture=8192, tokenizer_pool_size=0, tokenizer_pool_type='ray', tokenizer_pool_extra_config={}, limit_mm_per_prompt={}, mm_processor_kwargs=None, disable_mm_preprocessor_cache=False, enable_lora=None, enable_lora_bias=False, max_loras=1, max_lora_rank=16, lora_extra_vocab_size=256, lora_dtype='auto', long_lora_scaling_factors=None, max_cpu_loras=None, fully_sharded_loras=False, enable_prompt_adapter=None, max_prompt_adapters=1, max_prompt_adapter_token=0, device='auto', speculative_config=None, ignore_patterns=[], served_model_name=['default-model'], qlora_adapter_name_or_path=None, show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, disable_async_output_proc=False, max_num_batched_tokens=None, max_num_seqs=256, max_num_partial_prefills=1, max_long_partial_prefills=1, long_prefill_token_threshold=0, num_lookahead_slots=0, scheduler_delay_factor=0.0, preemption_mode=None, num_scheduler_steps=1, multi_step_stream_outputs=True, scheduling_policy='fcfs', enable_chunked_prefill=None, disable_chunked_mm_input=False, scheduler_cls='vllm.core.scheduler.Scheduler', override_neuron_config=None, override_pooler_config=None, compilation_config=None, kv_transfer_config=None, worker_cls='auto', worker_extension_cls='', generation_config='auto', override_generation_config=None, enable_sleep_mode=False, additional_config=None, enable_reasoning=False, disable_cascade_attn=False, disable_log_requests=True, max_log_len=None, disable_fastapi_docs=False, enable_prompt_tokens_details=False, enable_server_load_tracking=False)
6
+ INFO 01-04 13:13:56 [config.py:717] This model supports multiple tasks: {'classify', 'generate', 'score', 'reward', 'embed'}. Defaulting to 'generate'.
7
+ INFO 01-04 13:13:56 [config.py:2003] Chunked prefill is enabled with max_num_batched_tokens=8192.
8
+ /usr/local/lib/python3.10/dist-packages/torch/utils/_pytree.py:185: FutureWarning: optree is installed but the version is too old to support PyTorch Dynamo in C++ pytree. C++ pytree support is disabled. Please consider upgrading optree using `python3 -m pip install --upgrade 'optree>=0.13.0'`.
9
+ warnings.warn(
10
+ INFO 01-04 13:14:01 [__init__.py:239] Automatically detected platform cuda.
11
+ INFO 01-04 13:14:04 [core.py:58] Initializing a V1 LLM engine (v0.8.5) with config: model='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', speculative_config=None, tokenizer='/data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=None, served_model_name=default-model, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=True, chunked_prefill_enabled=True, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={"level":3,"custom_ops":["none"],"splitting_ops":["vllm.unified_attention","vllm.unified_attention_with_output"],"use_inductor":true,"compile_sizes":[],"use_cudagraph":true,"cudagraph_num_of_warmups":1,"cudagraph_capture_sizes":[512,504,496,488,480,472,464,456,448,440,432,424,416,408,400,392,384,376,368,360,352,344,336,328,320,312,304,296,288,280,272,264,256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],"max_capture_size":512}
12
+ WARNING 01-04 13:14:05 [utils.py:2522] Methods determine_num_available_blocks,device_config,get_cache_block_size_bytes,initialize_cache not implemented in <vllm.v1.worker.gpu_worker.Worker object at 0x7f5053f564d0>
13
+ INFO 01-04 13:14:06 [parallel_state.py:1004] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
14
+ INFO 01-04 13:14:06 [cuda.py:221] Using Flash Attention backend on V1 engine.
15
+ WARNING 01-04 13:14:06 [topk_topp_sampler.py:69] FlashInfer is not available. Falling back to the PyTorch-native implementation of top-p & top-k sampling. For the best performance, please install FlashInfer.
16
+ INFO 01-04 13:14:06 [gpu_model_runner.py:1329] Starting to load model /data/minimax-dialogue/users/ado/072025project/02verifiable/ckpt/rl_roleplay_1210_6-gmm1/global_step_160/hf...
17
+
18
+
19
+
20
+
21
+
22
+
23
+
24
+
25
+
26
+
27
+
28
+
29
+
30
+
31
+
32
+
33
+
34
+ INFO 01-04 13:15:40 [loader.py:458] Loading weights took 94.24 seconds
35
+ INFO 01-04 13:15:41 [gpu_model_runner.py:1347] Model loading took 61.0562 GiB and 94.501715 seconds
36
+ INFO 01-04 13:15:55 [backends.py:420] Using cache directory: /root/.cache/vllm/torch_compile_cache/75e72335d9/rank_0_0 for vLLM's torch.compile
37
+ INFO 01-04 13:15:55 [backends.py:430] Dynamo bytecode transform time: 14.44 s
38
+ INFO 01-04 13:15:59 [backends.py:136] Cache the graph of shape None for later use
39
+ INFO 01-04 13:16:49 [backends.py:148] Compiling a graph for general shape takes 53.17 s
40
+ INFO 01-04 13:17:53 [monitor.py:33] torch.compile takes 67.61 s in total
41
+ INFO 01-04 13:17:54 [kv_cache_utils.py:634] GPU KV cache size: 254,768 tokens
42
+ INFO 01-04 13:17:54 [kv_cache_utils.py:637] Maximum concurrency for 131,072 tokens per request: 1.94x
43
+ INFO 01-04 13:18:33 [gpu_model_runner.py:1686] Graph capturing finished in 40 secs, took 1.21 GiB
44
+ INFO 01-04 13:18:33 [core.py:159] init engine (profile, create kv cache, warmup model) took 172.44 seconds
45
+ INFO 01-04 13:18:33 [core_client.py:439] Core engine process 0 ready.
46
+ WARNING 01-04 13:18:33 [config.py:1239] Default sampling parameters have been overridden by the model's Hugging Face generation config recommended from the model creator. If this is not intended, please relaunch vLLM instance with `--generation-config vllm`.
47
+ INFO 01-04 13:18:33 [serving_chat.py:118] Using default chat sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
48
+ INFO 01-04 13:18:33 [serving_completion.py:61] Using default completion sampling params from model: {'temperature': 0.6, 'top_k': 20, 'top_p': 0.95}
49
+ INFO 01-04 13:18:33 [api_server.py:1090] Starting vLLM API server on http://127.0.0.1:8008
50
+ INFO 01-04 13:18:33 [launcher.py:28] Available routes are:
51
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /openapi.json, Methods: GET, HEAD
52
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /docs, Methods: GET, HEAD
53
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /docs/oauth2-redirect, Methods: GET, HEAD
54
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /redoc, Methods: GET, HEAD
55
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /health, Methods: GET
56
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /load, Methods: GET
57
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /ping, Methods: GET, POST
58
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /tokenize, Methods: POST
59
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /detokenize, Methods: POST
60
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/models, Methods: GET
61
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /version, Methods: GET
62
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/chat/completions, Methods: POST
63
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/completions, Methods: POST
64
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/embeddings, Methods: POST
65
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /pooling, Methods: POST
66
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /score, Methods: POST
67
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/score, Methods: POST
68
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/audio/transcriptions, Methods: POST
69
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /rerank, Methods: POST
70
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v1/rerank, Methods: POST
71
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /v2/rerank, Methods: POST
72
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /invocations, Methods: POST
73
+ INFO 01-04 13:18:33 [launcher.py:36] Route: /metrics, Methods: GET
74
+ INFO: Started server process [1319238]
75
+ INFO: Waiting for application startup.
76
+ INFO: Application startup compleIINFO 01-04 14:00:14 [loggers.py:111] Engine 000: Avg prompt throughput: 53.6 tokens/s, Avg generation throughput: 29.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.3%, Prefix cache hit rate: 0.0%
77
+ INFO 01-04 14:00:24 [loggers.py:111] Engine 000: Avg prompt throughput: 57.7 tokens/s, Avg generation throughput: 61.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.8%, Prefix cache hit rate: 1.4%
78
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
79
+ INFO 01-04 14:00:34 [loggers.py:111] Engine 000: Avg prompt throughput: 58.8 tokens/s, Avg generation throughput: 98.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cache hit rate: 1.9%
80
+ INFO: 10.45.190.192:0 - "GET /v1/models HTTP/1.1" 200 OK
81
+ INFO 01-04 14:00:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 93.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 1.9%
82
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
83
+ INFO 01-04 14:00:54 [loggers.py:111] Engine 000: Avg prompt throughput: 64.9 tokens/s, Avg generation throughput: 71.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.0%, Prefix cache hit rate: 2.1%
84
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
85
+ INFO 01-04 14:01:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 84.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.5%, Prefix cache hit rate: 2.1%
86
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
87
+ INFO 01-04 14:01:14 [loggers.py:111] Engine 000: Avg prompt throughput: 75.3 tokens/s, Avg generation throughput: 74.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.6%, Prefix cache hit rate: 2.1%
88
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
89
+ INFO 01-04 14:01:24 [loggers.py:111] Engine 000: Avg prompt throughput: 71.1 tokens/s, Avg generation throughput: 50.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.4%, Prefix cache hit rate: 2.1%
90
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
91
+ INFO 01-04 14:01:34 [loggers.py:111] Engine 000: Avg prompt throughput: 86.2 tokens/s, Avg generation throughput: 12.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.4%, Prefix cache hit rate: 2.1%
92
+ INFO 01-04 14:01:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.5%, Prefix cache hit rate: 2.1%
93
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
94
+ INFO 01-04 14:01:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 17.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 2.1%
95
+ INFO 01-04 14:02:04 [loggers.py:111] Engine 000: Avg prompt throughput: 90.2 tokens/s, Avg generation throughput: 33.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.5%, Prefix cache hit rate: 2.0%
96
+ INFO 01-04 14:02:14 [loggers.py:111] Engine 000: Avg prompt throughput: 110.2 tokens/s, Avg generation throughput: 51.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 1.7%
97
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
98
+ INFO 01-04 14:02:24 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.6%, Prefix cache hit rate: 1.7%
99
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
100
+ INFO 01-04 14:02:34 [loggers.py:111] Engine 000: Avg prompt throughput: 163.2 tokens/s, Avg generation throughput: 73.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cache hit rate: 1.6%
101
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
102
+ INFO 01-04 14:02:44 [loggers.py:111] Engine 000: Avg prompt throughput: 80.9 tokens/s, Avg generation throughput: 107.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.9%, Prefix cache hit rate: 1.4%
103
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
104
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
105
+ INFO 01-04 14:02:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 60.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 1.4%
106
+ INFO 01-04 14:03:04 [loggers.py:111] Engine 000: Avg prompt throughput: 110.1 tokens/s, Avg generation throughput: 5.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.5%, Prefix cache hit rate: 1.3%
107
+ INFO 01-04 14:03:14 [loggers.py:111] Engine 000: Avg prompt throughput: 124.2 tokens/s, Avg generation throughput: 65.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 1.1%
108
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
109
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
110
+ INFO 01-04 14:03:24 [loggers.py:111] Engine 000: Avg prompt throughput: 132.8 tokens/s, Avg generation throughput: 85.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.6%, Prefix cache hit rate: 1.1%
111
+ INFO 01-04 14:03:34 [loggers.py:111] Engine 000: Avg prompt throughput: 138.8 tokens/s, Avg generation throughput: 80.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 1.0%
112
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
113
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
114
+ INFO 01-04 14:03:44 [loggers.py:111] Engine 000: Avg prompt throughput: 90.8 tokens/s, Avg generation throughput: 51.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.4%, Prefix cache hit rate: 1.1%
115
+ INFO 01-04 14:03:54 [loggers.py:111] Engine 000: Avg prompt throughput: 120.3 tokens/s, Avg generation throughput: 63.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 1.1%
116
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
117
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
118
+ INFO 01-04 14:04:04 [loggers.py:111] Engine 000: Avg prompt throughput: 119.8 tokens/s, Avg generation throughput: 80.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.5%, Prefix cache hit rate: 5.9%
119
+ INFO 01-04 14:04:14 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 48.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.7%, Prefix cache hit rate: 5.9%
120
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
121
+ INFO 01-04 14:04:24 [loggers.py:111] Engine 000: Avg prompt throughput: 267.0 tokens/s, Avg generation throughput: 68.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 5.1%
122
+ INFO 01-04 14:04:34 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 94.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 5.1%
123
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
124
+ INFO 01-04 14:04:44 [loggers.py:111] Engine 000: Avg prompt throughput: 283.8 tokens/s, Avg generation throughput: 114.5 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 4.6%
125
+ INFO 01-04 14:04:54 [loggers.py:111] Engine 000: Avg prompt throughput: 182.6 tokens/s, Avg generation throughput: 139.4 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.7%, Prefix cache hit rate: 4.2%
126
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
127
+ INFO 01-04 14:05:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 145.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 4.2%
128
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
129
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
130
+ INFO 01-04 14:05:14 [loggers.py:111] Engine 000: Avg prompt throughput: 99.6 tokens/s, Avg generation throughput: 91.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.1%, Prefix cache hit rate: 7.6%
131
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
132
+ INFO 01-04 14:05:24 [loggers.py:111] Engine 000: Avg prompt throughput: 202.0 tokens/s, Avg generation throughput: 96.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 13.6%
133
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
134
+ INFO 01-04 14:05:34 [loggers.py:111] Engine 000: Avg prompt throughput: 140.5 tokens/s, Avg generation throughput: 121.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 12.9%
135
+ INFO 01-04 14:05:44 [loggers.py:111] Engine 000: Avg prompt throughput: 241.2 tokens/s, Avg generation throughput: 123.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: 12.0%
136
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
137
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
138
+ INFO 01-04 14:05:54 [loggers.py:111] Engine 000: Avg prompt throughput: 211.4 tokens/s, Avg generation throughput: 96.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 11.3%
139
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
140
+ INFO 01-04 14:06:04 [loggers.py:111] Engine 000: Avg prompt throughput: 294.4 tokens/s, Avg generation throughput: 93.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 10.4%
141
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
142
+ INFO 01-04 14:06:14 [loggers.py:111] Engine 000: Avg prompt throughput: 236.0 tokens/s, Avg generation throughput: 112.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 15.1%
143
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
144
+ INFO 01-04 14:06:24 [loggers.py:111] Engine 000: Avg prompt throughput: 114.3 tokens/s, Avg generation throughput: 91.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 14.7%
145
+ INFO 01-04 14:06:34 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.6 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 14.7%
146
+ INFO 01-04 14:06:44 [loggers.py:111] Engine 000: Avg prompt throughput: 377.1 tokens/s, Avg generation throughput: 144.7 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.9%, Prefix cache hit rate: 14.6%
147
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
148
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
149
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
150
+ INFO 01-04 14:06:54 [loggers.py:111] Engine 000: Avg prompt throughput: 166.6 tokens/s, Avg generation throughput: 148.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 15.3%
151
+ INFO 01-04 14:07:04 [loggers.py:111] Engine 000: Avg prompt throughput: 126.1 tokens/s, Avg generation throughput: 131.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 14.9%
152
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
153
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
154
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
155
+ INFO 01-04 14:07:14 [loggers.py:111] Engine 000: Avg prompt throughput: 242.7 tokens/s, Avg generation throughput: 82.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 14.2%
156
+ INFO 01-04 14:07:24 [loggers.py:111] Engine 000: Avg prompt throughput: 207.8 tokens/s, Avg generation throughput: 82.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 14.9%
157
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
158
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
159
+ INFO 01-04 14:07:34 [loggers.py:111] Engine 000: Avg prompt throughput: 277.3 tokens/s, Avg generation throughput: 50.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 14.1%
160
+ INFO 01-04 14:07:44 [loggers.py:111] Engine 000: Avg prompt throughput: 213.5 tokens/s, Avg generation throughput: 48.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 13.6%
161
+ INFO 01-04 14:07:54 [loggers.py:111] Engine 000: Avg prompt throughput: 329.5 tokens/s, Avg generation throughput: 91.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 12.9%
162
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
163
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
164
+ INFO 01-04 14:08:04 [loggers.py:111] Engine 000: Avg prompt throughput: 215.0 tokens/s, Avg generation throughput: 92.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 13.3%
165
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
166
+ INFO 01-04 14:08:14 [loggers.py:111] Engine 000: Avg prompt throughput: 189.1 tokens/s, Avg generation throughput: 96.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 12.9%
167
+ INFO 01-04 14:08:24 [loggers.py:111] Engine 000: Avg prompt throughput: 292.2 tokens/s, Avg generation throughput: 99.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 16.0%
168
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
169
+ INFO 01-04 14:08:34 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 124.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 16.0%
170
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
171
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
172
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
173
+ INFO 01-04 14:08:44 [loggers.py:111] Engine 000: Avg prompt throughput: 254.9 tokens/s, Avg generation throughput: 87.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.4%
174
+ INFO 01-04 14:08:54 [loggers.py:111] Engine 000: Avg prompt throughput: 200.4 tokens/s, Avg generation throughput: 39.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.0%, Prefix cache hit rate: 15.0%
175
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
176
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
177
+ INFO 01-04 14:09:04 [loggers.py:111] Engine 000: Avg prompt throughput: 256.8 tokens/s, Avg generation throughput: 22.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.5%
178
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
179
+ INFO 01-04 14:09:14 [loggers.py:111] Engine 000: Avg prompt throughput: 539.6 tokens/s, Avg generation throughput: 51.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.5%, Prefix cache hit rate: 13.5%
180
+ INFO 01-04 14:09:24 [loggers.py:111] Engine 000: Avg prompt throughput: 264.5 tokens/s, Avg generation throughput: 59.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 13.1%
181
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
182
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
183
+ INFO 01-04 14:09:34 [loggers.py:111] Engine 000: Avg prompt throughput: 244.4 tokens/s, Avg generation throughput: 45.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.0%, Prefix cache hit rate: 12.7%
184
+ INFO 01-04 14:09:44 [loggers.py:111] Engine 000: Avg prompt throughput: 398.8 tokens/s, Avg generation throughput: 57.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 12.1%
185
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
186
+ INFO 01-04 14:09:54 [loggers.py:111] Engine 000: Avg prompt throughput: 362.1 tokens/s, Avg generation throughput: 101.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 11.6%
187
+ INFO: INFO 01-04 14:09:57 [loggers.py:111] Engine 000: Avg proINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
188
+ INFO 01-04 14:10:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generaINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
189
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.INFO 01-04 14:10:14 [loggers.py:111] Engine 000: Avg prompt throughput: 448.8 tokens/s, Avg generation throughput: 19.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 11.1%
190
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
191
+ INFO 01-04 14:10:24 [loggers.py:111] Engine 000: Avg prompt throughput: 281.9 tokens/s, Avg generation throughput: 58.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.3%, Prefix cache hit rate: 10.8%
192
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
193
+ INFO 01-04 14:10:34 [loggers.py:111] Engine 000: Avg prompt throughput: 286.5 tokens/s, Avg generation throughput: 31.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 10.5%
194
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
195
+ INFO 01-04 14:10:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.5%
196
+ INFO 01-04 14:10:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.5%
197
+ INFO 01-04 14:11:04 [loggers.py:111] Engine 000: Avg prompt throughput: 267.6 tokens/s, Avg generation throughput: 21.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 10.3%
198
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
199
+ INFO 01-04 14:11:14 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 36.4 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.3%
200
+ INFO 01-04 14:11:24 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 10.3%
201
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
202
+ INFO 01-04 14:11:34 [loggers.py:111] Engine 000: Avg prompt throughput: 397.6 tokens/s, Avg generation throughput: 34.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 9.9%
203
+ INFO 01-04 14:11:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0INFO 01-04 14:11:46 [loggers.pyINFO 01-04 14:11:54 [loggers.py:111] Engine 000: Avg prompt throughput: 305.7 tokens/s, Avg generation throughput: 2.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.2%, Prefix cache hit rate: 9.6%
204
+ INFO 01-04 14:12:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 9.6%
205
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
206
+ INFO 01-04 14:12:14 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 31.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: INFO 01-04 14:12:16 [loggers.py:11INFO 01-04 14:12:24 [loggers.py:111] Engine 000: Avg prompt throughput: 527.5 tokens/s, Avg generation throughpINFO 01-04 14:12:29 [loggers.py:111] Engine 000: Avg prompt throughput: 189.1 tokens/s, Avg generationINFO 01-04 14:12:37 [loggers.py:111] Engine 000: Avg prompt throughput: 322.6 tokens/s, Avg generation throughput: 1INFO 01-04 14:12:39 [loggers.py:111] Engine 000: Avg prompt throughput: 528.0 tokens/s, Avg generation INFO 01-04 14:12:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage:INFO 01-04 14:12:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thINFO 01-04 14:12:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usaINFO 01-04 14:12:59 [loggers.py:111] EnINFO 01-04 14:13:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usaINFO 01-04 14:13:09 [loggers.py:111] EnINFO 01-04 14:13:17 [loggers.py:111] Engine 000: Avg prompt throughput: 507.0 tokens/s, Avg generation throughput: 19.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usINFO 01-04 14:13:19 [loggers.py:111] EngINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
207
+ INFO 01-04 14:13:27 [loggers.py:111] Engine 000: Avg prompt throughput: 577.2 tokens/s, Avg generation throughput: 46.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 15.9%
208
+ INFO 01-04 14:13:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 15.9%
209
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
210
+ INFO 01-04 14:13:47 [loggers.py:111] Engine 000: Avg prompt throughput: 950.4 tokens/s, Avg generation throughput: 58.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.9%, Prefix cache hit rate: 16.5%
211
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
212
+ INFO 01-04 14:13:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 87.6 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 16.5%
213
+ INFO: 10.43.30.3:0 -INFO 01-04 14:13:59 [loggers.py:111] EngINFO 01-04 14:14:07 [loggers.py:111] Engine 000: Avg prompt throughput: 286.2 tokens/s, Avg generation throughput: 27.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache uINFO: 10.43.30.4:0 - "POST /v1/compleINFO 01-04 14:14:17 [loggers.py:111] Engine 000: Avg prompt throughput: 391.1 tokens/s, Avg generation throughput: 70.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 17.9%
214
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
215
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
216
+ INFO 01-04 14:14:27 [loggers.py:111] Engine 000: Avg prompt throughput: 312.5 tokens/s, Avg generation throughput: 115.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.4%, Prefix cache hit rate: 17.5%
217
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
218
+ INFO 01-04 14:14:37 [loggers.py:111] Engine 000: Avg prompt throughput: 975.2 tokens/s, Avg generation throughput: 55.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU INFO 01-04 14:14:39 [loggers.py:111] Engine 000: AvINFO 01-04 14:14:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 92.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.3%, Prefix cache hit rate: 16.5%
219
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
220
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
221
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
222
+ INFO 01-04 14:14:57 [loggers.py:111] Engine 000: Avg prompt throughput: 656.0 tokens/s, Avg generation throughput: 56.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.1%, Prefix cache hit rate: 17.6%
223
+ INFO 01-04 14:15:07 [loggers.py:111] Engine 000: Avg prompt throughput: 436.1 tokens/s, Avg generation throughput: 78.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 18.2%
224
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
225
+ INFO 01-04 14:15:17 [loggers.py:111] Engine 000: Avg prompt throughput: 444.8 tokens/s, Avg generation throughput: 100.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 17.7%
226
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
227
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
228
+ INFO 01-04 14:15:27 [loggers.py:111] Engine 000: Avg prompt throughput: 528.0 tokens/s, Avg generation throughput: 77.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 17.3%
229
+ INFO: 10.46.INFO: 10.43.30.3:0 - "POST /v1/completions HTTINFO 01-04 14:15:37 [loggers.py:111] Engine 000: Avg prompt throughput: 385.4 tokens/s, Avg generation throughput: 40.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.6%, Prefix cache hit rate: 16.9%
230
+ INFO 01-04 14:15:47 [loggers.py:111] Engine 000: Avg prompt throughput: 353.2 tokens/s, Avg generation throughput: 83.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 17.1%
231
+ INFO: INFO 01-04 14:15:49 [loggers.py:111] Engine 000: Avg prompINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
232
+ INFO 01-04 14:15:57 [loggers.py:111] Engine 000: Avg prompt throughput: 433.8 tokens/s, Avg generation throughput: 43.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.8%, Prefix cache hit rate: 18.6%
233
+ INFO 01-04 14:16:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.9%, Prefix cache hit rate: 18.6%
234
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
235
+ INFO 01-04 14:16:17 [loggers.py:111] Engine 000: Avg prompt throughput: 1148.9 tokens/s, Avg generation throughput: 66.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs,INFO 01-04 14:16:19 [loggers.py:111] Engine 000: Avg proINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
236
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
237
+ INFO 01-04 14:16:27 [loggers.py:111] Engine 000: Avg prompt throughput: 348.1 tokens/s, Avg geneINFO 01-04 14:16:29 [loggers.py:111] Engine 000: Avg prompt throughput: 559.7 tokens/s, Avg generation throughput: 106.4 toINFO 01-04 14:16:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generatINFO 01-04 14:16:39 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 134.1 tINFO 01-04 14:16:47 [loggers.py:111] Engine 000: Avg prompt throughput: 740.6 tokens/s, Avg generaINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
238
+ INFO 01-04 14:16:49 [loggers.py:111] Engine 000: Avg promINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
239
+ INFO 01-04 14:16:57 [loggers.py:111] Engine 000: Avg prompt throughput: 506.6 tokens/s, Avg generation throughput: 126.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.5%, Prefix cache hit rate: 18.2%
240
+ INFO: 10INFO 01-04 14:16:59 [loggers.py:111] Engine 000: AvgINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
241
+ INFO 01-04 14:17:07 [loggers.py:111] Engine 000: Avg prompt throughput: 421.8 tokens/s, Avg generation throughput: 88.2 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache hit rate: 17.9%
242
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
243
+ INFO: 10.46.17.192INFO 01-04 14:17:14 [loggers.py:111] Engine INFO 01-04 14:17:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation thINFO 01-04 14:17:19 [loggers.py:111] Engine 000: Avg prompt throughput: 6INFO 01-04 14:17:24 [loggers.py:111] EngiINFO 01-04 14:17:27 [loggers.py:111] Engine 000: Avg prompt throughput: 522.6 tokens/s, Avg generation throughput: 44.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 19.4%
244
+ INFO 01-04 14:17:37 [loggers.py:111] Engine 000: Avg prompt throughput: 908.0 tokens/s, Avg generation throughput: 92.2 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.2%, Prefix cache hit rate: 18.7%
245
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
246
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
247
+ INFO 01-04 14:17:47 [loggers.py:111] Engine 000: Avg prompt throughput: 459.1 tokens/s, Avg generation tINFO 01-04 14:17:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.INFO: 10.46.50.192:0 INFO 01-04 14:17:54 [loggers.py:111] EngiINFO 01-04 14:17:57 [loggers.py:111] Engine 000: Avg prompt throughput: 477.8 tokens/s, Avg generatINFO 01-04 14:17:59 [loggers.py:111] Engine 000: Avg prompt throughput: 1344.9 INFO 01-04 14:18:04 [loggers.py:111] EngiINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
248
+ INFO 01-04 14:18:07 [loggers.py:111] Engine 000: Avg prompt throughput: 594.1 tokens/s, Avg generation throughpuINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.INFO 01-04INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
249
+ INFO 01-04 14:18:14 [loggers.py:111] EnginINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
250
+ INFO 01-04 14:18:17 [loggers.py:1INFO 01-04 14:18:19 [loggers.py:111] Engine 000: Avg prompt throughput: 967.8 tINFO 01-04 14:18:24 [loggers.py:111] Engine 000: Avg prompt throughput: 480.5 tokens/s, Avg generation throINFO 01-04 14:18:27 [loggers.py:111] Engine 000: Avg prompt throughput: 594.8 tokens/s, Avg generatINFO 01-04 14:18:29 [loggers.py:111] Engine 000: Avg prompt throughput: 537.9 toINFO: 10.46.50.192:0 - "POST /v1/comINFO 01-04 14:18:37 [loggers.py:111] Engine 000: Avg prompt throughput: 747.7 tokens/s, Avg generation throughput: 128.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.3%, Prefix cache hit rate: 17.6%
251
+ INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
252
+ INFO: 10.46.50.192:0 - "INFO 01-04 14:18:44 [loggers.py:111] Engine 000: Avg prompt throughput: 414.9 tokens/s, Avg generationINFO 01-04 14:18:47 [loggers.py:111INFO 01-04 14:18:49 [loggers.py:111] Engine 000: Avg prompt throughput: 554.6 tokeINFO 01-04 14:18:54 [loggers.py:111] Engine 000: Avg prompt throughput: 334.6 tokens/s, Avg generationINFO 01-04 14:18:57 [loggers.py:111INFO 01-04 14:18:59 [loggers.py:111] Engine 000: Avg prompt throughput: 750.9 tokens/s, Avg generation throughput: 62.4 tokens/s, Running: 2 reqs, WaINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
253
+ INFO 01-04 14:19:04 [loggers.py:111INFO: 10.45.190.192:0 - "POST /INFO 01-04 14:19:09 [loggers.py:111] Engine 000: Avg prompt throughput: 679.8 tokens/s, Avg generation throughput: 68.7 tokens/s, Running: 2 reqs, WaiINFO 01-04 14:19:14 [loggers.py:111] Engine 000: Avg prompt throughput: 277.5 tokens/s, Avg generatioINFO 01-04 14:19:17 [loggers.py:111] Engine 000: Avg prompt throughput: 412.3 tokens/s, Avg generaINFO 01-04 14:19:19 INFO 01-04 14:19:24 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 137.4 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
254
+ INFO 01-04 14:19:27 [loggers.pyINFO 01-04 14:19:INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
255
+ INFO 01-04 14:19:34 [loggers.py:111] Engine 000: Avg prompt throughput: 609.1 tokens/s, Avg generation INFO 01-04 14:19:37 [loggers.py:1INFO 01-04 14:19:39 [loggers.py:111] Engine 000: Avg prompt throughput: 327.6 tokens/s, Avg generation throughput: 53.4 tokens/s, Running: 2 reqs, WINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
256
+ INFOINFO 01-04 14:19:49 [loggers.py:111] Engine 000: Avg prompt throughput: 630.0 tokens/s, Avg generation throughput: 88.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.1%, Prefix cache hit rate: INFO: INFO: 10.43.30.5:0 - "POST /v1INFO: 10.46.17.192:0 - "POINFO 01-04 14:19:59 [loggers.py:111] Engine 000: Avg prompt throughput: 704.9 tokens/s, Avg generation throughput: 98.9 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.7%, Prefix cache hit rate: 11.INFO 01-04 14:20:04 [loggers.py:111] Engine 000: Avg prompt throughpuINFO: 10.43.30.5:0 - "POST /v1INFO 01-04 14:20:07 [loggers.pINFO 01-04 14:20:09 [loggers.py:111] Engine 000: Avg prompt throughput: 534.9 tokens/sINFO 01-04 14:20:14 [loggers.py:111] Engine 000: Avg prompt throughput: 453.0 tokens/s, Avg generation INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
257
+ INFO 01-04 14:20:17 [loggers.pINFO 01-04 14:20:19 [loggers.py:111] Engine 000: Avg prompt throughput: 646.6 tokens/INFO 01-04 14:20:24 [loggers.py:111] Engine 000: Avg prompt throughput: 617.5 tokens/s, Avg generation throughput: 75.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KVINFO 01-04 14:20:27 [loggersINFO 01-04 14:20:29 [INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
258
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
259
+ INFO 01-04 14:20:34 [loggers.py:111] Engine 000: Avg prompt throughput: 482.5 tokens/s, Avg generation INFO 01-04 14:20:37 [loggers.INFO 01-04 14:20:39 [loggers.py:111] Engine 000: Avg prompt throughput: 420.4 tokens/s,INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
260
+ INFO 01-04 14:20:44 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:20:49 [loggers.py:111] Engine 000: Avg prompt throughput: 451.3 tokens/s, Avg generation throughput: 53.8 tokens/s, Running: 2 reqs, WaitingINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OKINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
261
+ INFO 01-04 14:20:59 [loggers.py:111] Engine 000: Avg prompt throughput: 964.4 tokens/s, Avg generation throughput: 75.3 tokens/s, Running: 2 reqs, WaitingINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
262
+ IINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
263
+ INFO 01-04 14:21:09 [loggers.py:111] Engine 000: Avg prompt throughput: 697.4 tokens/s, Avg generation throughput: 104.3 tokens/s, Running: 2 reqs, WaINFO 01-04 14:21:14 [loggers.py:111] Engine 000: Avg prompt throughputINFO: 10.43.30.5:0 - "POST /vINFO 01-04 14:21:17 [loggers.pyINFO 01-04 14:21:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s,INFO 01-04 14:21:24 [loggers.py:111] Engine 000: Avg prompt throughput: 751.5 tokens/s, Avg generatioINFO 01-04 14:21:27 [loggers.py:111] Engine 000: Avg prompt throughput: 1347.5 tokens/s, Avg geINFO 01-04 14:21:29 [loggers.py:111] Engine 000: Avg prompt throughput: 184.8 tokens/s,INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
264
+ INFO 01-04 14:21:34 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:21:39 [loggers.py:111] Engine 000: Avg prompt throughput: 641.2 tokens/s, Avg generation throughput: 91.1 tokens/s, Running: 2 reqs, WaitINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
265
+ INFOINFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
266
+ INFO 01-04 14:21:49 [loggers.py:111] Engine 000: Avg prompt throughput: 711.2 tokens/s, Avg generation throughput: 100.0 tokens/s, Running: 2 reqs, WaiINFO 01-04 14:21:54 [loggers.py:111] Engine 000: Avg prompt throughpuINFO 01-04 14:21:59 [loggers.py:111] Engine 000: Avg prompt throughput: 577.5 tokens/s, Avg generation throughput: 101.3 tokens/s, Running: 3 reqs, WaINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
267
+ INFINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
268
+ INFO 01-04 14:22:09 [loggers.py:111] Engine 000: Avg prompt throughput: 222.2 tokens/s, Avg generation throughput: 118.2 tokens/s, Running: 3 reqs, WINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
269
+ INFOINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
270
+ INFO 01-04 14:22:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 120.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: INFO 0INFO 01-04 14:22:29 [loggers.py:111] Engine 000: Avg prompt throughput: 612.5 tokens/s, Avg generation throughput: 87.3 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 5.6%, Prefix cache hit rate: INFO 01-04 14:22:34 [loggers.py:111] Engine 000: Avg prompt throughput: INFO 01-04 14:22:39 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 100.2 tokens/s, Running: 2 reqs, WINFO 01-04 14:22:44 [loggers.py:111] Engine 000: Avg prompt throughput:INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
271
+ INFO 01-04 14:22:49 [loggers.py:111] Engine 000: Avg prompt throughput: 531.4 tokeINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
272
+ INFO 01-04 14:22:54 [loggers.py:111] Engine 000: Avg prompt throughput: 829.4 tokens/s, Avg generation throughput: 100.2 tokens/s, Running:INFO 01-04 14:22:59 [loggers.py:111] Engine 000: Avg prompt throughput: 568.1 tokINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
273
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
274
+ INFO 0INFO 01-04 14:23:09 [loggers.py:111] Engine 000: Avg prompt throughput: 657.1 tokens/s, Avg generation throughput: 116.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.9%, Prefix cache hit rate:INFO 01-04 14:23:14 [loggers.py:111] Engine 000: Avg prompt throughput: 74INFO 01-04 14:23:19 [loggers.py:111] Engine 000: Avg prompt throughput: 807.2 tokens/s, Avg generation throughput: 97.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.7%, Prefix cache hit rateINFO: INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.INFO 01-04INFO 01-04 14:23:29 [loggers.py:111] Engine 000: Avg prompt throughput: 713.3 tokens/s, Avg generation throughput: 133.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.7%, Prefix cache hitINFO 01-04 14INFO 01-04 14:23:39 [loggers.py:111] Engine 000: Avg prompt throughput: 404.1 tokens/s, Avg generation throughput: 138.4 tokens/s, Running: 4 reqs, Waiting: 0 reqs, GPU KV cache usage: 10.9%, Prefix cache hit rate: 11.3%
275
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTINFO 01-04 14:INFO 01-04 14:23:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 141.0 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.3%, Prefix cache hitINFO 01-04 14:23:54 [loggers.py:111] Engine 000: Avg prompt throughput: 569.3 tINFO 01-04 14:23:59 [loggers.py:111] Engine 000: Avg prompt throughput: 609.9 tokens/s, Avg generation throughput: 138.9 tokens/s, Running: INFO 01-04 14:24:04 [loggers.py:111] Engine 000: Avg prompt throughput: 735.8 toINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
276
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
277
+ INFO 01-04 14:24:09 [loggers.py:111] Engine 000: Avg prompt throughput: 90INFO 01-04 14:24:14 [loggers.py:111] Engine 000: Avg prompt throughput: 849.5 tokens/s, Avg generation throughput: 98.8 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 9.2%, Prefix cache hit rate: INFO 01-04 14:24:19 [loggers.py:111] Engine 000: Avg prompt throughput: 652.3 tokens/s, Avg generation throughput: 67.9 tokens/s, RunninINFO 01-04 14:24:24 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/INFO 01-04 14:24:29 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 6.7%, Prefix cacINFO 01-04 14:24:34INFO 01-04 14:24:39 [loggers.py:111] Engine 000: Avg prompt throughput: 728.4 tokens/s, Avg generation throughput: 107.7 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 10.0%, Prefix cINFO: 10.46.50.19INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
278
+ INFO 01-04 14:24:49 [loggers.py:111] Engine 000: Avg prompt throughput: 478.7 tokens/s, Avg generation throughput: 140.1 tokens/s, Running: 3 reqs, Waiting: 0 reqs, GPU KV cache usage: 9.3%, PrefINFO 01-04 14:24:54 [loggers.py:111] Engine 000: Avg prompt throughput: 939.1 tokens/s, AINFO: 10INFO 01-04 14:24:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.INFO 01-04 14:24:59 [loggers.py:111] Engine 000: Avg prompt througINFO 01-04 14:25:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation INFO 01-04 14:25:07 [loggers.py:111] Engine 000: Avg INFO 01-04 14:25:09 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:25:14 [loggers.py:111] Engine 000: Avg prompt throughput: 712.9 tokens/s, Avg generation throughput: 25.6 tokens/s, Running: 1 reqs, Waiting: 0INFO 01-04 14:25:19 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:25:24 [loggers.py:111] Engine 000: Avg prompt throughput: 599.7 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KINFO: 10.45.190.192:0 - "POST /v1/completions INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
279
+ INFO 01-04 14:25:34 [loggers.py:111] Engine 000: Avg prompt throughput: 511.0 tokens/s, Avg generation throughput: 76.0 tokens/s, Running: 2 reqs, Waiting: 0INFO 01-04 14:25:39 [loggers.py:111] Engine 000: Avg prompt thINFO 01-04 14:25:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 91.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.9%, Prefix cache hit rate: 11.5%
280
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
281
+ INFO 01-04 14:25:54 [loggers.py:111] Engine 000: Avg prompt throughput: 345.0 tokens/s, Avg generation throughput: 81.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.8%, Prefix cache hit rate: 11.4%
282
+ INFO: INFO 01-04 14:25:59 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:26:04 [loggers.py:111] Engine 000: Avg prompt throughput: 577.1 tokens/s, Avg generation throughput: 77.7 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KINFO 01-04 14:26:07 [loggers.py:111] Engine 000: AINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
283
+ INFO 01-04 14:26:14 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generationINFO 01-04 14:26:17 [loggers.py:111] Engine 000: Avg prompt throughput: 798.1 tokens/s, Avg generation throughput: 84.2 toINFO 01-04 14:26:19 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:26:24 [loggers.py:111] EINFO 01-04 14:26:27 [loggers.py:111] Engine 000: Avg prompt throughput: 589.4 tokens/s, Avg generation throughput: 119.4INFO 01-04 14:26:29 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:26:34 [loggers.py:111] Engine 000: Avg prompt throughput: 827.0 tokens/s, Avg generation throINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
284
+ INFO 01-04 14:26:37 [loggers.py:111] Engine 00INFO 01-04 14:26:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.5%, Prefix cache hit rate: 13.8%IINFO 01-04 14:26:54 [loggers.py:111] Engine 000: Avg prompt throughput: 975.8 tokens/s, Avg generation throughput: 71.5 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.6%, Prefix cache hit rate: 13.5%
285
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
286
+ INFO 01-04 14:27:04 [loggers.py:111] Engine 000: Avg prompt throughput: 809.6 tokens/s, Avg generation throughput: 100.1 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU INFO: 10.46.17.192:0 - "POST /v1/completions HTINFO 01-04 14:27:14 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 89.8 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.8%, Prefix cache hit rate: 13.3%
287
+ INFO: 10.46.1INFO 01-04 14:27:17 [loggers.py:111] Engine 000: Avg prompt throughput: 825.7 tokens/s, Avg generation throughput: INFO 01-04 14:27:24 [loggers.py:111] Engine 000: Avg prompt throughput: 1172.5 tokens/s, Avg generation throughput: 41.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPINFOINFO 01-04 14:27:27 [loggers.py:111] Engine 000: INFO 01-04 14:27:34 [loggers.py:111] Engine 000: Avg prompt throughput: 805.6 tokens/s, Avg generation throughput: 61.9 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU INFO: 10.45.190.192:0 - "POST /v1/completions HINFO 01-04 14:27:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 89.4 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cache usage: 8.5%, Prefix cache hit rate: 12.7%
288
+ INFO: 10.46.INFO 01-04 14:27:47 [loggers.py:111] Engine 000: AINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
289
+ INFO 01-04 14:27:54 [loggers.py:111] Engine 000: Avg prompt throughput: 623.8 tokens/s, Avg generation INFO 01-04 14:27:57 [loggers.py:111] Engine 000: AINFO 01-04 14:27:59 [loggers.py:111] Engine 000: Avg prompt througINFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
290
+ INFO 01-04 14:28:04 [loggers.py:111] Engine 000: Avg prompt throughput: 420.2 tokens/s,INFO: 10.43.30INFO: 10.43.30.5:0 - "POST /v1/completionsINFO 01-04 14:28:09 [loggers.py:111] Engine 000: Avg prompt throughpINFO 01-04 14:28:14 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, WaitinINFO: 10.45.INFO 01-04 14:28:17 [loggers.py:111] Engine 000: AIINFO 01-04 14:28:19 [loggers.py:111] Engine 000: Avg prompt throuINFO 01-04 14:28:24 [loggers.py:111] Engine 000: Avg prompt throughput: 852.0 tokens/s, Avg generation INFO 01-04 14:28:27 [loggers.py:111] Engine 000: INFO 01-04 14:28:29 [loggers.py:111] Engine 000: Avg prompt throughINFO 01-04 14:28:34 [loggers.py:111] Engine 000: Avg prompt throughput: 901.2 tokens/s, Avg generation throughput: 75.6 tokens/s, Running: 2 reqs, WaitiINFO 01-04 14:28:39 [loggers.py:111] Engine 000: Avg INFO 01-04 14:28:45 [loggers.py:111] Engine 000: Avg prompt throughput: 1033.5 tokens/s, Avg generation throughput: 25.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache uINFO 01-04 14:28:47 [loggers.py:111] EngiINFO 01-04 14:28:55 [loggers.py:111] Engine 000: Avg prompt throughput: 620.4 tokens/s, Avg generation throughput: 48.3 tokens/s, Running: 2 reqs, Waiting: 0 reqs, GPU KV cachINFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
291
+ INFO 01-04 14:29:07 [loggers.py:111] Engine 000: Avg prompt throughput: 698.3 tokens/s, Avg generation throughput: 30.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.8%
292
+ INFO 01-04 14:29:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.8%
293
+ INFO 01-04 14:29:27 [loggers.py:111] Engine 000: Avg prompt throughput: 818.5 tokens/s, Avg generation throughput: 28.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 18.5%
294
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
295
+ INFO 01-04 14:29:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 8.3 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 18.5%
296
+ INFO 01-04 14:29:47 [loggers.py:111] Engine 000: Avg prINFO 01-04 14:29:49 [loggers.py:111] Engine 000: Avg prompt throughput: 742.5 tokens/s, Avg generation throughput: 35.9 tokens/s, Running: 1 reqs, Waiting: 0 reqINFO 01-04 14:29:57 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:29:59 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs,INFO 01-04 14:30:07 [loggers.py:111] Engine 000: Avg proINFO 01-04 14:30:09 [loggers.py:111] Engine 000: Avg prompt throughput: 758.9 tokens/s, Avg generation throughput: 84.0 tokens/s, Running: 2 reqs, Waiting: 0 reqINFO 01-04 14:30:17 [loggers.py:111] Engine 000: Avg prompINFO 01-04 14:30:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 90.4 tokens/s, Running: 2 reqs, Waiting: 0 reqINFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
297
+ INFO 01-04 14:30:27 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:30:29 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 58.5 tokens/s, Running: 1 reqs, Waiting: 0 reqINFO 01-04 14:30:37 [loggers.py:111] Engine 000: Avg prompINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
298
+ INFO 01-04 14:30:39 [loggers.py:111] Engine 000: Avg prompt throughput: 573.3 tokens/s, Avg INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
299
+ INFO 01-04 14:30:47 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:30:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 INFO 01-04 14:30:57 [loggers.py:111] Engine 000: Avg prompt throughput: 758.2 tokens/s, Avg generation throughput: 37.2 tokens/INFO 01-04 14:30:59 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg geINFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
300
+ INFO 01-04 14:31:07 [loggers.py:111] Engine 000: Avg promptINFO 01-04 14:31:09 [loggers.py:111] Engine 000: Avg prompt throughput: 778.2 tokens/s, Avg generation throughput: 27.9 tokens/s, Running: 1 reqs, Waiting: 0 INFO 01-04 14:31:17 [loggers.py:111] Engine 000: Avg prompt tINFO 01-04 14:31:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 rINFO 01-04 14:31:27 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:31:29 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.8 tokens/s, Running: 1 reqs, Waiting: 0 rINFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
301
+ INFO 01-04 14:31:37 [loggers.py:111] Engine 000: Avg prompt INFO 01-04 14:31:39 [loggers.py:111] Engine 000: Avg prompt throughput: 962.2 tokens/s, Avg generation throughput: 37.5 tokens/s, Running: 1 reqs, Waiting: 0INFO 01-04 14:31:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RINFO 01-04 14:31:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvINFO 01-04 14:31:57 [loggers.py:111] Engine 000: Avg prompt throughput: 500.0 tokens/s, Avg generation throughput: 30.0 tokens/sINFO 01-04 14:31:59 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg gINFO 01-04 14:32:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/INFO 01-04 14:32:09 [loggers.py:111] Engine 000: Avg prompt throughput: 937.4 tokens/s, Avg INFO: 10.43.30.5:0 - "POST /v1/completions HTTP/1.1" 200 OK
302
+ INFO 01-04 14:32:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 36.5 tokens/s, INFO 01-04 14:32:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvgINFO 01-04 14:32:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, RINFO 01-04 14:32:29 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, AvINFO 01-04 14:32:47 [loggers.py:111] Engine 000: Avg prompt throughput: 708.9 tokens/s, Avg generation throughput: 33.6 tokens/sINFO 01-04 14:33:29 [loggers.py:111] Engine 000: Avg prompt throughput: 527.0 tokens/s, Avg generation throughput: 23.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 13.7%
303
+ INFO 01-04 14:33:39 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 13.7%
304
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
305
+ INFO 01-04 14:33:49 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 39.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.7%
306
+ INFO 01-04 14:33:59 [loggers.pINFO 01-04 14:34:17 [loggers.py:111] Engine 000: Avg prompt throughput: 988.6 tokens/s, Avg generation throughput: 31.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.0%, Prefix cache hit rate: 15.5%
307
+ INFO 01-04 14:34:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.2%, Prefix cache hit rate: 15.5%
308
+ INFO 01-04 14:34:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 4.4%, Prefix cache hit rate: 15.5%
309
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
310
+ INFO 01-04 14:34:47 [loggers.py:111] Engine 000: Avg prompt throughput: 591.2 tokens/s, Avg generation throughput: 22.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 15.4%
311
+ INFO 01-04 14:34:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage:INFO 01-04 14:35:09 [loggers.py:111] Engine 000: Avg prompt throughput: 581.7 tokens/s, Avg generation throughput: 14.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cINFO 01-04 14:35:14 [loggers.py:111] Engine 000INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
312
+ INFO 01-04 14:35:19 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation througINFO 01-04 14:35:24 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughpuINFO 01-04 14:35:29 [loggers.py:1INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
313
+ INFO 01-04 14:36:37 [loggers.py:111] Engine 000: Avg prompt throughput: 601.2 tokens/s, Avg generation throughput: 44.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.3%
314
+ INFO 01-04 14:36:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.3%
315
+ INFO 01-04 14:38:27 [loggers.py:111] Engine 000: Avg prompt throughput: 630.3 tokens/s, Avg generation throughput: 17.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 15.1%
316
+ INFO 01-04 14:38:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 15.1%
317
+ INFO 01-04 14:38:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 15.1%
318
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
319
+ INFO 01-04 14:38:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 1.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.1%
320
+ INFO 01-04 14:39:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.1%
321
+ INFO 01-04 14:39:57 [loggers.py:111] Engine 000: Avg prompt throughput: 583.0 tokens/s, Avg generation throughput: 24.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 15.0%
322
+ INFO 01-04 14:40:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 15.0%
323
+ INFO: 10.46.17.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
324
+ INFO 01-04 14:40:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 43.2 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.0%
325
+ INFO 01-04 14:40:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.0%
326
+ INFO 01-04 14:41:57 [loggers.py:111] Engine 000: Avg prompt throughput: 609.1 tokens/s, Avg generation throughput: 45.4 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 14.8%
327
+ INFO 01-04 14:42:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 14.8%
328
+ INFO: 10.43.30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
329
+ INFO 01-04 14:42:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 1.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.8%
330
+ INFO 01-04 14:42:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.8%
331
+ INFO 01-04 14:42:47 [loggers.py:111] Engine 000: Avg prompt throughput: 720.5 tokens/s, Avg generation throughput: 32.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.0%, Prefix cache hit rate: 14.7%
332
+ INFO 01-04 14:42:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 46.9 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 14.7%
333
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
334
+ INFO 01-04 14:43:07 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 21.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.7%
335
+ INFO 01-04 14INFO 01-04 14:43:25 [loggers.py:111] Engine 000: Avg prompt throughput: 1802.9 tokens/s, Avg generation throughput: 20.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 7.2%, Prefix cacheINFO 01-04 14:47:27 [loggers.py:111] Engine 000: Avg prompt throughput: 773.2 tokens/s, Avg generation throughput: 38.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 15.4%
336
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
337
+ INFO 01-04 14:47:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 22.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.4%
338
+ INFO 01-04 14:47:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.4%
339
+ INFO 01-04 15:03:07 [loggers.py:111] Engine 000: Avg prompt throughput: 618.6 tokens/s, Avg generation throughput: 25.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 15.3%
340
+ INFO 01-04 15:03:17 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.7%, Prefix cache hit rate: 15.3%
341
+ INFO 01-04 15:03:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.9%, Prefix cache hit rate: 15.3%
342
+ INFO 01-04 15:03:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.1%, Prefix cache hit rate: 15.3%
343
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
344
+ INFO 01-04 15:03:47 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 37.9 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.3%
345
+ INFO 01-04 15:03:57 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.3%
346
+ .30.3:0 - "POST /v1/completions HTTP/1.1" 200 OK
347
+ INFO 01-04 14:47:37 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 20.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usageINFO 01-04 14:48:45 [loggers.py:111] Engine 000: Avg prompt throughput: 814.9 tokens/s, Avg generation throughput: 33.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.3%, Prefix cache hit rate: 14.9%
348
+ INFO 01-04 14:48:55 [loggers.py:11INFO 01-04 14:54:17 [loggers.py:111] Engine 000: Avg prompt throughput: 420.7 tokens/s, Avg generation throughput: 10.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 1.7%, Prefix cache hit rate: 19.3%
349
+ INFO 01-04 14:54:27 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, RunINFO 01-04 14:58:09 [loggers.py:111] Engine 000: Avg prompt throughput: 454.0 tokens/s, Avg generation throughput: 16.0 tokens/s, Running: 1 reqs, Waiting: INFO 01-04 14:59:06 [loggers.py:111] Engine 000: Avg prompt throughput: 573.5 tokens/s, Avg generation throughput: 32.0 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 16.5%
350
+ INFO 01-04 14:59:16 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.6%, Prefix cache hit rate: 16.5%
351
+ INFO 01-04 14:59:26 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.7 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.8%, Prefix cache hit rate: 16.5%
352
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
353
+ INFO 01-04 14:59:36 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 19.5 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.5%
354
+ INFO 01-04 14:59:46 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 16.5%
355
+ .8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.6%, Prefix cache hit rate: 13.5%
356
+ INFO: 10.46.50.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
357
+ INFO 01-04 14:50:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 10.8 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.5%
358
+ INFO 01-04 14:50:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 13.5%
359
+ 15.1 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.0%
360
+ INFO 01-04 14:48:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 15.0%
361
+ INFO 01-04 14:59:34 [loggers.py:111] Engine 000: Avg prompt throughput: 579.9 tokens/s, Avg generation throughput: 14.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.3%, Prefix cache hit rate: 14.8%
362
+ INFO 01-04 14:59:44 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.3 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.5%, Prefix cache hit rate: 14.8%
363
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
364
+ INFO 01-04 14:59:54 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 26.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.8%
365
+ INFO 01-04 15:00:04 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.8%
366
+ y:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 31.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.3%
367
+ INFO 01-04 14:47:11 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.3%
368
+ INFO 01-04 14:48:11 [loggers.py:111] Engine 000: Avg prompt throughput: 797.4 tokens/s, Avg generation throughput: 21.8 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.2%, Prefix cache hit rate: 14.1%
369
+ INFO 01-04 14:48:21 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.1 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 3.4%, Prefix cache hit rate: 14.1%
370
+ INFO: 10.45.190.192:0 - "POST /v1/completions HTTP/1.1" 200 OK
371
+ INFO 01-04 14:48:31 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 13.6 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.1%
372
+ INFO 01-04 14:48:41 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.1%
373
+ INFO 01-04 14:57:21 [loggers.py:111] Engine 000: Avg prompt throughput: 554.8 tokens/s, Avg generation throughput: 2.2 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.2%, Prefix cache hit rate: 14.5%
374
+ INFO 01-04 14:57:31 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 47.5 tokens/s, Running: 1 reqs, Waiting: 0 reqs, GPU KV cache usage: 2.4%, Prefix cache hit rate: 14.5%
375
+ INFO: 10.43.30.4:0 - "POST /v1/completions HTTP/1.1" 200 OK
376
+ INFO 01-04 14:57:41 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 44.7 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.5%
377
+ INFO 01-04 14:57:51 [loggers.py:111] Engine 000: Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 0 reqs, Waiting: 0 reqs, GPU KV cache usage: 0.0%, Prefix cache hit rate: 14.5%
hf_ip/vllm_instances.txt ADDED
@@ -0,0 +1,232 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 127.0.0.1:8001
2
+ 127.0.0.1:8002
3
+ 127.0.0.1:8003
4
+ 127.0.0.1:8004
5
+ 127.0.0.1:8005
6
+ 127.0.0.1:8006
7
+ 127.0.0.1:8007
8
+ 127.0.0.1:8008
9
+ 127.0.0.1:8001
10
+ 127.0.0.1:8002
11
+ 127.0.0.1:8003
12
+ 127.0.0.1:8004
13
+ 127.0.0.1:8005
14
+ 127.0.0.1:8006
15
+ 127.0.0.1:8007
16
+ 127.0.0.1:8008
17
+ 127.0.0.1:8001
18
+ 127.0.0.1:8002
19
+ 127.0.0.1:8003
20
+ 127.0.0.1:8004
21
+ 127.0.0.1:8005
22
+ 127.0.0.1:8006
23
+ 127.0.0.1:8007
24
+ 127.0.0.1:8008
25
+ 127.0.0.1:8001
26
+ 127.0.0.1:8002
27
+ 127.0.0.1:8001
28
+ 127.0.0.1:8003
29
+ 127.0.0.1:8002
30
+ 127.0.0.1:8004
31
+ 127.0.0.1:8003
32
+ 127.0.0.1:8005
33
+ 127.0.0.1:8004
34
+ 127.0.0.1:8001
35
+ 127.0.0.1:8001
36
+ 127.0.0.1:8006
37
+ 127.0.0.1:8005
38
+ 127.0.0.1:8002
39
+ 127.0.0.1:8001
40
+ 127.0.0.1:8002
41
+ 127.0.0.1:8007
42
+ 127.0.0.1:8001
43
+ 127.0.0.1:8006
44
+ 127.0.0.1:8003
45
+ 127.0.0.1:8002
46
+ 127.0.0.1:8003
47
+ 127.0.0.1:8008
48
+ 127.0.0.1:8002
49
+ 127.0.0.1:8007
50
+ 127.0.0.1:8004
51
+ 127.0.0.1:8003
52
+ 127.0.0.1:8001
53
+ 127.0.0.1:8004
54
+ 127.0.0.1:8003
55
+ 127.0.0.1:8008
56
+ 127.0.0.1:8005
57
+ 127.0.0.1:8001
58
+ 127.0.0.1:8004
59
+ 127.0.0.1:8002
60
+ 127.0.0.1:8005
61
+ 127.0.0.1:8004
62
+ 127.0.0.1:8006
63
+ 127.0.0.1:8002
64
+ 127.0.0.1:8005
65
+ 127.0.0.1:8003
66
+ 127.0.0.1:8006
67
+ 127.0.0.1:8005
68
+ 127.0.0.1:8007
69
+ 127.0.0.1:8003
70
+ 127.0.0.1:8006
71
+ 127.0.0.1:8004
72
+ 127.0.0.1:8007
73
+ 127.0.0.1:8006
74
+ 127.0.0.1:8008
75
+ 127.0.0.1:8004
76
+ 127.0.0.1:8007
77
+ 127.0.0.1:8005
78
+ 127.0.0.1:8008
79
+ 127.0.0.1:8007
80
+ 127.0.0.1:8005
81
+ 127.0.0.1:8008
82
+ 127.0.0.1:8006
83
+ 127.0.0.1:8008
84
+ 127.0.0.1:8006
85
+ 127.0.0.1:8007
86
+ 127.0.0.1:8007
87
+ 127.0.0.1:8008
88
+ 127.0.0.1:8008
89
+ 127.0.0.1:8001
90
+ 127.0.0.1:8001
91
+ 127.0.0.1:8001
92
+ 127.0.0.1:8002
93
+ 127.0.0.1:8002
94
+ 127.0.0.1:8002
95
+ 127.0.0.1:8003
96
+ 127.0.0.1:8003
97
+ 127.0.0.1:8003
98
+ 127.0.0.1:8004
99
+ 127.0.0.1:8004
100
+ 127.0.0.1:8004
101
+ 127.0.0.1:8005
102
+ 127.0.0.1:8005
103
+ 127.0.0.1:8005
104
+ 127.0.0.1:8006
105
+ 127.0.0.1:8006
106
+ 127.0.0.1:8006
107
+ 127.0.0.1:8007
108
+ 127.0.0.1:8007
109
+ 127.0.0.1:8007
110
+ 127.0.0.1:8008
111
+ 127.0.0.1:8008
112
+ 127.0.0.1:8008
113
+ 127.0.0.1:8001
114
+ 127.0.0.1:8002
115
+ 127.0.0.1:8003
116
+ 127.0.0.1:8004
117
+ 127.0.0.1:8005
118
+ 127.0.0.1:8006
119
+ 127.0.0.1:8001
120
+ 127.0.0.1:8007
121
+ 127.0.0.1:8002
122
+ 127.0.0.1:8008
123
+ 127.0.0.1:8003
124
+ 127.0.0.1:8004
125
+ 127.0.0.1:8001
126
+ 127.0.0.1:8005
127
+ 127.0.0.1:8002
128
+ 127.0.0.1:8006
129
+ 127.0.0.1:8001
130
+ 127.0.0.1:8003
131
+ 127.0.0.1:8007
132
+ 127.0.0.1:8002
133
+ 127.0.0.1:8001
134
+ 127.0.0.1:8004
135
+ 127.0.0.1:8008
136
+ 127.0.0.1:8003
137
+ 127.0.0.1:8002
138
+ 127.0.0.1:8005
139
+ 127.0.0.1:8004
140
+ 127.0.0.1:8003
141
+ 127.0.0.1:8006
142
+ 127.0.0.1:8005
143
+ 127.0.0.1:8004
144
+ 127.0.0.1:8007
145
+ 127.0.0.1:8006
146
+ 127.0.0.1:8005
147
+ 127.0.0.1:8008
148
+ 127.0.0.1:8007
149
+ 127.0.0.1:8006
150
+ 127.0.0.1:8008
151
+ 127.0.0.1:8007
152
+ 127.0.0.1:8008
153
+ 127.0.0.1:8001
154
+ 127.0.0.1:8002
155
+ 127.0.0.1:8003
156
+ 127.0.0.1:8004
157
+ 127.0.0.1:8005
158
+ 127.0.0.1:8006
159
+ 127.0.0.1:8007
160
+ 127.0.0.1:8008
161
+ 127.0.0.1:8001
162
+ 127.0.0.1:8002
163
+ 127.0.0.1:8003
164
+ 127.0.0.1:8004
165
+ 127.0.0.1:8005
166
+ 127.0.0.1:8006
167
+ 127.0.0.1:8007
168
+ 127.0.0.1:8008
169
+ 127.0.0.1:8001
170
+ 127.0.0.1:8002
171
+ 127.0.0.1:8003
172
+ 127.0.0.1:8004
173
+ 127.0.0.1:8005
174
+ 127.0.0.1:8006
175
+ 127.0.0.1:8007
176
+ 127.0.0.1:8008
177
+ 127.0.0.1:8001
178
+ 127.0.0.1:8001
179
+ 127.0.0.1:8002
180
+ 127.0.0.1:8002
181
+ 127.0.0.1:8003
182
+ 127.0.0.1:8003
183
+ 127.0.0.1:8004
184
+ 127.0.0.1:8004
185
+ 127.0.0.1:8005
186
+ 127.0.0.1:8005
187
+ 127.0.0.1:8006
188
+ 127.0.0.1:8006
189
+ 127.0.0.1:8007
190
+ 127.0.0.1:8007
191
+ 127.0.0.1:8008
192
+ 127.0.0.1:8008
193
+ 127.0.0.1:8001
194
+ 127.0.0.1:8001
195
+ 127.0.0.1:8002
196
+ 127.0.0.1:8002
197
+ 127.0.0.1:8003
198
+ 127.0.0.1:8001
199
+ 127.0.0.1:8003
200
+ 127.0.0.1:8004
201
+ 127.0.0.1:8002
202
+ 127.0.0.1:8004
203
+ 127.0.0.1:8005
204
+ 127.0.0.1:8003
205
+ 127.0.0.1:8005
206
+ 127.0.0.1:8006
207
+ 127.0.0.1:8004
208
+ 127.0.0.1:8006
209
+ 127.0.0.1:8007
210
+ 127.0.0.1:8005
211
+ 127.0.0.1:8007
212
+ 127.0.0.1:8008
213
+ 127.0.0.1:8006
214
+ 127.0.0.1:8008
215
+ 127.0.0.1:8007
216
+ 127.0.0.1:8008
217
+ 127.0.0.1:8001
218
+ 127.0.0.1:8002
219
+ 127.0.0.1:8001
220
+ 127.0.0.1:8003
221
+ 127.0.0.1:8002
222
+ 127.0.0.1:8004
223
+ 127.0.0.1:8003
224
+ 127.0.0.1:8005
225
+ 127.0.0.1:8004
226
+ 127.0.0.1:8006
227
+ 127.0.0.1:8005
228
+ 127.0.0.1:8007
229
+ 127.0.0.1:8006
230
+ 127.0.0.1:8008
231
+ 127.0.0.1:8007
232
+ 127.0.0.1:8008