diff --git "a/inference_graph.json" "b/inference_graph.json" new file mode 100644--- /dev/null +++ "b/inference_graph.json" @@ -0,0 +1,30486 @@ +{ + "nodes": { + "server.config": { + "path": "server.config", + "access_count": 6, + "reads": 6, + "writes": 0, + "total_bytes": 1184 + }, + "server.config.max_tokens": { + "path": "server.config.max_tokens", + "access_count": 3, + "reads": 3, + "writes": 0, + "total_bytes": 84 + }, + "server.config.temperature": { + "path": "server.config.temperature", + "access_count": 3, + "reads": 3, + "writes": 0, + "total_bytes": 72 + }, + "server.buffer": { + "path": "server.buffer", + "access_count": 33, + "reads": 33, + "writes": 0, + "total_bytes": 6584 + }, + "server.buffer.input_ids": { + "path": "server.buffer.input_ids", + "access_count": 3, + "reads": 3, + "writes": 0, + "total_bytes": 6480 + }, + "server.layer_0": { + "path": "server.layer_0", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 5984 + }, + "server.layer_0.q": { + "path": "server.layer_0.q", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.layer_0.k": { + "path": "server.layer_0.k", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.layer_0.v": { + "path": "server.layer_0.v", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.kv_cache_0": { + "path": "server.kv_cache_0", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 5984 + }, + "server.kv_cache_0.keys": { + "path": "server.kv_cache_0.keys", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 3936000 + }, + "server.kv_cache_0.values": { + "path": "server.kv_cache_0.values", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 3936000 + }, + "server.layer_0.ffn_up": { + "path": "server.layer_0.ffn_up", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 1969920 + }, + "server.layer_0.ffn_down": { + "path": "server.layer_0.ffn_down", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 1969920 + }, + "server.layer_1": { + "path": "server.layer_1", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 5984 + }, + "server.layer_1.q": { + "path": "server.layer_1.q", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.layer_1.k": { + "path": "server.layer_1.k", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.layer_1.v": { + "path": "server.layer_1.v", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.kv_cache_1": { + "path": "server.kv_cache_1", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 5984 + }, + "server.kv_cache_1.keys": { + "path": "server.kv_cache_1.keys", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 3936000 + }, + "server.kv_cache_1.values": { + "path": "server.kv_cache_1.values", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 3936000 + }, + "server.layer_1.ffn_up": { + "path": "server.layer_1.ffn_up", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 1969920 + }, + "server.layer_1.ffn_down": { + "path": "server.layer_1.ffn_down", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 1969920 + }, + "server.layer_2": { + "path": "server.layer_2", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 5984 + }, + "server.layer_2.q": { + "path": "server.layer_2.q", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.layer_2.k": { + "path": "server.layer_2.k", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.layer_2.v": { + "path": "server.layer_2.v", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.kv_cache_2": { + "path": "server.kv_cache_2", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 5984 + }, + "server.kv_cache_2.keys": { + "path": "server.kv_cache_2.keys", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 3936000 + }, + "server.kv_cache_2.values": { + "path": "server.kv_cache_2.values", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 3936000 + }, + "server.layer_2.ffn_up": { + "path": "server.layer_2.ffn_up", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 1969920 + }, + "server.layer_2.ffn_down": { + "path": "server.layer_2.ffn_down", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 1969920 + }, + "server.layer_3": { + "path": "server.layer_3", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 5984 + }, + "server.layer_3.q": { + "path": "server.layer_3.q", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.layer_3.k": { + "path": "server.layer_3.k", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.layer_3.v": { + "path": "server.layer_3.v", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.kv_cache_3": { + "path": "server.kv_cache_3", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 5984 + }, + "server.kv_cache_3.keys": { + "path": "server.kv_cache_3.keys", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 3936000 + }, + "server.kv_cache_3.values": { + "path": "server.kv_cache_3.values", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 3936000 + }, + "server.layer_3.ffn_up": { + "path": "server.layer_3.ffn_up", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 1969920 + }, + "server.layer_3.ffn_down": { + "path": "server.layer_3.ffn_down", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 1969920 + }, + "server.layer_4": { + "path": "server.layer_4", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 5984 + }, + "server.layer_4.q": { + "path": "server.layer_4.q", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.layer_4.k": { + "path": "server.layer_4.k", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.layer_4.v": { + "path": "server.layer_4.v", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.kv_cache_4": { + "path": "server.kv_cache_4", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 5984 + }, + "server.kv_cache_4.keys": { + "path": "server.kv_cache_4.keys", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 3936000 + }, + "server.kv_cache_4.values": { + "path": "server.kv_cache_4.values", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 3936000 + }, + "server.layer_4.ffn_up": { + "path": "server.layer_4.ffn_up", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 1969920 + }, + "server.layer_4.ffn_down": { + "path": "server.layer_4.ffn_down", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 1969920 + }, + "server.layer_5": { + "path": "server.layer_5", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 5984 + }, + "server.layer_5.q": { + "path": "server.layer_5.q", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.layer_5.k": { + "path": "server.layer_5.k", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.layer_5.v": { + "path": "server.layer_5.v", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 495360 + }, + "server.kv_cache_5": { + "path": "server.kv_cache_5", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 5984 + }, + "server.kv_cache_5.keys": { + "path": "server.kv_cache_5.keys", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 3936000 + }, + "server.kv_cache_5.values": { + "path": "server.kv_cache_5.values", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 3936000 + }, + "server.layer_5.ffn_up": { + "path": "server.layer_5.ffn_up", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 1969920 + }, + "server.layer_5.ffn_down": { + "path": "server.layer_5.ffn_down", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 1969920 + }, + "server.buffer.logits": { + "path": "server.buffer.logits", + "access_count": 30, + "reads": 30, + "writes": 0, + "total_bytes": 3843360 + } + }, + "edges": [ + { + "source": "server.config", + "target": "server.config.max_tokens", + "count": 3, + "mean_delta_ms": 0.016, + "std_delta_ms": 0.012, + "weight": 1.72 + }, + { + "source": "server.config", + "target": "server.config.temperature", + "count": 6, + "mean_delta_ms": 0.019, + "std_delta_ms": 0.015, + "weight": 3.3 + }, + { + "source": "server.config", + "target": "server.buffer", + "count": 12, + "mean_delta_ms": 0.751, + "std_delta_ms": 0.729, + "weight": 6.09 + }, + { + "source": "server.config", + "target": "server.buffer.input_ids", + "count": 6, + "mean_delta_ms": 0.034, + "std_delta_ms": 0.019, + "weight": 3.87 + }, + { + "source": "server.config", + "target": "server.layer_0", + "count": 12, + "mean_delta_ms": 0.773, + "std_delta_ms": 0.729, + "weight": 6.17 + }, + { + "source": "server.config", + "target": "server.layer_0.q", + "count": 12, + "mean_delta_ms": 0.78, + "std_delta_ms": 0.728, + "weight": 6.21 + }, + { + "source": "server.config", + "target": "server.layer_0.k", + "count": 12, + "mean_delta_ms": 0.785, + "std_delta_ms": 0.727, + "weight": 6.23 + }, + { + "source": "server.config", + "target": "server.layer_0.v", + "count": 12, + "mean_delta_ms": 0.791, + "std_delta_ms": 0.727, + "weight": 6.25 + }, + { + "source": "server.config", + "target": "server.kv_cache_0", + "count": 12, + "mean_delta_ms": 0.798, + "std_delta_ms": 0.727, + "weight": 6.28 + }, + { + "source": "server.config", + "target": "server.kv_cache_0.keys", + "count": 12, + "mean_delta_ms": 0.804, + "std_delta_ms": 0.726, + "weight": 6.31 + }, + { + "source": "server.config", + "target": "server.kv_cache_0.values", + "count": 12, + "mean_delta_ms": 0.809, + "std_delta_ms": 0.725, + "weight": 6.33 + }, + { + "source": "server.config", + "target": "server.layer_0.ffn_up", + "count": 12, + "mean_delta_ms": 0.814, + "std_delta_ms": 0.725, + "weight": 6.35 + }, + { + "source": "server.config", + "target": "server.layer_0.ffn_down", + "count": 12, + "mean_delta_ms": 0.82, + "std_delta_ms": 0.725, + "weight": 6.37 + }, + { + "source": "server.config", + "target": "server.layer_1", + "count": 12, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.719, + "weight": 6.99 + }, + { + "source": "server.config", + "target": "server.layer_1.q", + "count": 12, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.718, + "weight": 7.02 + }, + { + "source": "server.config", + "target": "server.layer_1.k", + "count": 12, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.718, + "weight": 7.04 + }, + { + "source": "server.config", + "target": "server.layer_1.v", + "count": 12, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.718, + "weight": 7.05 + }, + { + "source": "server.config", + "target": "server.kv_cache_1", + "count": 12, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.721, + "weight": 7.07 + }, + { + "source": "server.config", + "target": "server.kv_cache_1.keys", + "count": 12, + "mean_delta_ms": 1.039, + "std_delta_ms": 0.72, + "weight": 7.09 + }, + { + "source": "server.config", + "target": "server.kv_cache_1.values", + "count": 12, + "mean_delta_ms": 1.045, + "std_delta_ms": 0.72, + "weight": 7.1 + }, + { + "source": "server.config", + "target": "server.layer_1.ffn_up", + "count": 12, + "mean_delta_ms": 1.05, + "std_delta_ms": 0.72, + "weight": 7.12 + }, + { + "source": "server.config", + "target": "server.layer_1.ffn_down", + "count": 12, + "mean_delta_ms": 1.056, + "std_delta_ms": 0.719, + "weight": 7.14 + }, + { + "source": "server.config", + "target": "server.layer_2", + "count": 6, + "mean_delta_ms": 0.517, + "std_delta_ms": 0.061, + "weight": 5.37 + }, + { + "source": "server.config", + "target": "server.layer_2.q", + "count": 6, + "mean_delta_ms": 0.526, + "std_delta_ms": 0.064, + "weight": 5.34 + }, + { + "source": "server.config", + "target": "server.layer_2.k", + "count": 6, + "mean_delta_ms": 0.532, + "std_delta_ms": 0.065, + "weight": 5.35 + }, + { + "source": "server.config", + "target": "server.layer_2.v", + "count": 6, + "mean_delta_ms": 0.54, + "std_delta_ms": 0.064, + "weight": 5.37 + }, + { + "source": "server.config", + "target": "server.kv_cache_2", + "count": 6, + "mean_delta_ms": 0.547, + "std_delta_ms": 0.064, + "weight": 5.37 + }, + { + "source": "server.config", + "target": "server.kv_cache_2.keys", + "count": 6, + "mean_delta_ms": 0.553, + "std_delta_ms": 0.067, + "weight": 5.36 + }, + { + "source": "server.config", + "target": "server.kv_cache_2.values", + "count": 6, + "mean_delta_ms": 0.559, + "std_delta_ms": 0.067, + "weight": 5.36 + }, + { + "source": "server.config", + "target": "server.layer_2.ffn_up", + "count": 6, + "mean_delta_ms": 0.563, + "std_delta_ms": 0.068, + "weight": 5.36 + }, + { + "source": "server.config", + "target": "server.layer_2.ffn_down", + "count": 6, + "mean_delta_ms": 0.568, + "std_delta_ms": 0.068, + "weight": 5.36 + }, + { + "source": "server.config", + "target": "server.layer_3", + "count": 6, + "mean_delta_ms": 0.757, + "std_delta_ms": 0.058, + "weight": 5.57 + }, + { + "source": "server.config", + "target": "server.layer_3.q", + "count": 6, + "mean_delta_ms": 0.774, + "std_delta_ms": 0.058, + "weight": 5.58 + }, + { + "source": "server.config", + "target": "server.layer_3.k", + "count": 6, + "mean_delta_ms": 0.78, + "std_delta_ms": 0.058, + "weight": 5.59 + }, + { + "source": "server.config", + "target": "server.layer_3.v", + "count": 6, + "mean_delta_ms": 0.797, + "std_delta_ms": 0.047, + "weight": 5.67 + }, + { + "source": "server.config", + "target": "server.kv_cache_3", + "count": 6, + "mean_delta_ms": 0.805, + "std_delta_ms": 0.047, + "weight": 5.67 + }, + { + "source": "server.config", + "target": "server.kv_cache_3.keys", + "count": 6, + "mean_delta_ms": 0.812, + "std_delta_ms": 0.049, + "weight": 5.66 + }, + { + "source": "server.config", + "target": "server.kv_cache_3.values", + "count": 6, + "mean_delta_ms": 0.818, + "std_delta_ms": 0.049, + "weight": 5.66 + }, + { + "source": "server.config", + "target": "server.layer_3.ffn_up", + "count": 6, + "mean_delta_ms": 0.836, + "std_delta_ms": 0.066, + "weight": 5.56 + }, + { + "source": "server.config", + "target": "server.layer_3.ffn_down", + "count": 6, + "mean_delta_ms": 0.843, + "std_delta_ms": 0.068, + "weight": 5.55 + }, + { + "source": "server.config", + "target": "server.layer_4", + "count": 6, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.069, + "weight": 5.62 + }, + { + "source": "server.config", + "target": "server.layer_4.q", + "count": 6, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.071, + "weight": 5.61 + }, + { + "source": "server.config", + "target": "server.layer_4.k", + "count": 6, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.074, + "weight": 5.6 + }, + { + "source": "server.config", + "target": "server.layer_4.v", + "count": 6, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.074, + "weight": 5.6 + }, + { + "source": "server.config", + "target": "server.kv_cache_4", + "count": 6, + "mean_delta_ms": 1.042, + "std_delta_ms": 0.075, + "weight": 5.6 + }, + { + "source": "server.config", + "target": "server.kv_cache_4.keys", + "count": 6, + "mean_delta_ms": 1.049, + "std_delta_ms": 0.077, + "weight": 5.59 + }, + { + "source": "server.config", + "target": "server.kv_cache_4.values", + "count": 6, + "mean_delta_ms": 1.054, + "std_delta_ms": 0.077, + "weight": 5.59 + }, + { + "source": "server.config", + "target": "server.layer_4.ffn_up", + "count": 6, + "mean_delta_ms": 1.06, + "std_delta_ms": 0.078, + "weight": 5.59 + }, + { + "source": "server.config", + "target": "server.layer_4.ffn_down", + "count": 6, + "mean_delta_ms": 1.065, + "std_delta_ms": 0.078, + "weight": 5.59 + }, + { + "source": "server.config", + "target": "server.layer_5", + "count": 6, + "mean_delta_ms": 1.247, + "std_delta_ms": 0.09, + "weight": 5.6 + }, + { + "source": "server.config", + "target": "server.layer_5.q", + "count": 6, + "mean_delta_ms": 1.257, + "std_delta_ms": 0.095, + "weight": 5.58 + }, + { + "source": "server.config", + "target": "server.layer_5.k", + "count": 6, + "mean_delta_ms": 1.264, + "std_delta_ms": 0.096, + "weight": 5.57 + }, + { + "source": "server.config", + "target": "server.layer_5.v", + "count": 6, + "mean_delta_ms": 1.269, + "std_delta_ms": 0.097, + "weight": 5.57 + }, + { + "source": "server.config", + "target": "server.kv_cache_5", + "count": 6, + "mean_delta_ms": 1.276, + "std_delta_ms": 0.098, + "weight": 5.57 + }, + { + "source": "server.config", + "target": "server.kv_cache_5.keys", + "count": 6, + "mean_delta_ms": 1.284, + "std_delta_ms": 0.1, + "weight": 5.57 + }, + { + "source": "server.config", + "target": "server.kv_cache_5.values", + "count": 6, + "mean_delta_ms": 1.289, + "std_delta_ms": 0.1, + "weight": 5.57 + }, + { + "source": "server.config", + "target": "server.layer_5.ffn_up", + "count": 6, + "mean_delta_ms": 1.295, + "std_delta_ms": 0.101, + "weight": 5.57 + }, + { + "source": "server.config", + "target": "server.layer_5.ffn_down", + "count": 6, + "mean_delta_ms": 1.3, + "std_delta_ms": 0.101, + "weight": 5.57 + }, + { + "source": "server.config", + "target": "server.buffer.logits", + "count": 6, + "mean_delta_ms": 1.485, + "std_delta_ms": 0.103, + "weight": 5.61 + }, + { + "source": "server.config.max_tokens", + "target": "server.config", + "count": 3, + "mean_delta_ms": 0.007, + "std_delta_ms": 0.001, + "weight": 2.47 + }, + { + "source": "server.config.max_tokens", + "target": "server.config.temperature", + "count": 3, + "mean_delta_ms": 0.014, + "std_delta_ms": 0.003, + "weight": 2.48 + }, + { + "source": "server.config.max_tokens", + "target": "server.buffer", + "count": 6, + "mean_delta_ms": 0.747, + "std_delta_ms": 0.729, + "weight": 3.04 + }, + { + "source": "server.config.max_tokens", + "target": "server.buffer.input_ids", + "count": 3, + "mean_delta_ms": 0.03, + "std_delta_ms": 0.009, + "weight": 2.32 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_0", + "count": 6, + "mean_delta_ms": 0.768, + "std_delta_ms": 0.729, + "weight": 3.08 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_0.q", + "count": 6, + "mean_delta_ms": 0.775, + "std_delta_ms": 0.727, + "weight": 3.1 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_0.k", + "count": 6, + "mean_delta_ms": 0.781, + "std_delta_ms": 0.727, + "weight": 3.11 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_0.v", + "count": 6, + "mean_delta_ms": 0.786, + "std_delta_ms": 0.727, + "weight": 3.12 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_0", + "count": 6, + "mean_delta_ms": 0.793, + "std_delta_ms": 0.726, + "weight": 3.13 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_0.keys", + "count": 6, + "mean_delta_ms": 0.8, + "std_delta_ms": 0.725, + "weight": 3.15 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_0.values", + "count": 6, + "mean_delta_ms": 0.805, + "std_delta_ms": 0.725, + "weight": 3.16 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_0.ffn_up", + "count": 6, + "mean_delta_ms": 0.81, + "std_delta_ms": 0.725, + "weight": 3.17 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_0.ffn_down", + "count": 6, + "mean_delta_ms": 0.815, + "std_delta_ms": 0.724, + "weight": 3.18 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_1", + "count": 6, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.719, + "weight": 3.49 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_1.q", + "count": 6, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.718, + "weight": 3.51 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_1.k", + "count": 6, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.717, + "weight": 3.51 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_1.v", + "count": 6, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.717, + "weight": 3.52 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_1", + "count": 6, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.72, + "weight": 3.53 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_1.keys", + "count": 6, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.719, + "weight": 3.54 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_1.values", + "count": 6, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.72, + "weight": 3.55 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_1.ffn_up", + "count": 6, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.72, + "weight": 3.55 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_1.ffn_down", + "count": 6, + "mean_delta_ms": 1.052, + "std_delta_ms": 0.719, + "weight": 3.56 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_2", + "count": 3, + "mean_delta_ms": 0.513, + "std_delta_ms": 0.055, + "weight": 2.71 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_2.q", + "count": 3, + "mean_delta_ms": 0.522, + "std_delta_ms": 0.058, + "weight": 2.7 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_2.k", + "count": 3, + "mean_delta_ms": 0.527, + "std_delta_ms": 0.059, + "weight": 2.7 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_2.v", + "count": 3, + "mean_delta_ms": 0.536, + "std_delta_ms": 0.057, + "weight": 2.71 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_2", + "count": 3, + "mean_delta_ms": 0.542, + "std_delta_ms": 0.058, + "weight": 2.71 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_2.keys", + "count": 3, + "mean_delta_ms": 0.549, + "std_delta_ms": 0.06, + "weight": 2.7 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_2.values", + "count": 3, + "mean_delta_ms": 0.554, + "std_delta_ms": 0.061, + "weight": 2.7 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_2.ffn_up", + "count": 3, + "mean_delta_ms": 0.559, + "std_delta_ms": 0.061, + "weight": 2.7 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_2.ffn_down", + "count": 3, + "mean_delta_ms": 0.564, + "std_delta_ms": 0.062, + "weight": 2.7 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_3", + "count": 3, + "mean_delta_ms": 0.753, + "std_delta_ms": 0.051, + "weight": 2.81 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_3.q", + "count": 3, + "mean_delta_ms": 0.77, + "std_delta_ms": 0.051, + "weight": 2.81 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_3.k", + "count": 3, + "mean_delta_ms": 0.776, + "std_delta_ms": 0.052, + "weight": 2.81 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_3.v", + "count": 3, + "mean_delta_ms": 0.793, + "std_delta_ms": 0.04, + "weight": 2.86 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_3", + "count": 3, + "mean_delta_ms": 0.8, + "std_delta_ms": 0.04, + "weight": 2.86 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_3.keys", + "count": 3, + "mean_delta_ms": 0.808, + "std_delta_ms": 0.042, + "weight": 2.85 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_3.values", + "count": 3, + "mean_delta_ms": 0.814, + "std_delta_ms": 0.042, + "weight": 2.85 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_3.ffn_up", + "count": 3, + "mean_delta_ms": 0.832, + "std_delta_ms": 0.06, + "weight": 2.8 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_3.ffn_down", + "count": 3, + "mean_delta_ms": 0.838, + "std_delta_ms": 0.062, + "weight": 2.79 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_4", + "count": 3, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.062, + "weight": 2.83 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_4.q", + "count": 3, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.065, + "weight": 2.82 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_4.k", + "count": 3, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.068, + "weight": 2.81 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_4.v", + "count": 3, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.068, + "weight": 2.81 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_4", + "count": 3, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.068, + "weight": 2.81 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_4.keys", + "count": 3, + "mean_delta_ms": 1.045, + "std_delta_ms": 0.071, + "weight": 2.81 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_4.values", + "count": 3, + "mean_delta_ms": 1.05, + "std_delta_ms": 0.071, + "weight": 2.81 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_4.ffn_up", + "count": 3, + "mean_delta_ms": 1.055, + "std_delta_ms": 0.071, + "weight": 2.81 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_4.ffn_down", + "count": 3, + "mean_delta_ms": 1.061, + "std_delta_ms": 0.072, + "weight": 2.81 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_5", + "count": 3, + "mean_delta_ms": 1.243, + "std_delta_ms": 0.084, + "weight": 2.81 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_5.q", + "count": 3, + "mean_delta_ms": 1.253, + "std_delta_ms": 0.089, + "weight": 2.8 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_5.k", + "count": 3, + "mean_delta_ms": 1.26, + "std_delta_ms": 0.09, + "weight": 2.8 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_5.v", + "count": 3, + "mean_delta_ms": 1.265, + "std_delta_ms": 0.091, + "weight": 2.8 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_5", + "count": 3, + "mean_delta_ms": 1.271, + "std_delta_ms": 0.092, + "weight": 2.8 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_5.keys", + "count": 3, + "mean_delta_ms": 1.279, + "std_delta_ms": 0.094, + "weight": 2.8 + }, + { + "source": "server.config.max_tokens", + "target": "server.kv_cache_5.values", + "count": 3, + "mean_delta_ms": 1.285, + "std_delta_ms": 0.094, + "weight": 2.8 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_5.ffn_up", + "count": 3, + "mean_delta_ms": 1.29, + "std_delta_ms": 0.095, + "weight": 2.79 + }, + { + "source": "server.config.max_tokens", + "target": "server.layer_5.ffn_down", + "count": 3, + "mean_delta_ms": 1.296, + "std_delta_ms": 0.095, + "weight": 2.79 + }, + { + "source": "server.config.max_tokens", + "target": "server.buffer.logits", + "count": 3, + "mean_delta_ms": 1.48, + "std_delta_ms": 0.097, + "weight": 2.81 + }, + { + "source": "server.config.temperature", + "target": "server.buffer", + "count": 6, + "mean_delta_ms": 0.732, + "std_delta_ms": 0.728, + "weight": 3.01 + }, + { + "source": "server.config.temperature", + "target": "server.buffer.input_ids", + "count": 3, + "mean_delta_ms": 0.016, + "std_delta_ms": 0.006, + "weight": 2.19 + }, + { + "source": "server.config.temperature", + "target": "server.layer_0", + "count": 6, + "mean_delta_ms": 0.754, + "std_delta_ms": 0.728, + "weight": 3.05 + }, + { + "source": "server.config.temperature", + "target": "server.layer_0.q", + "count": 6, + "mean_delta_ms": 0.761, + "std_delta_ms": 0.727, + "weight": 3.07 + }, + { + "source": "server.config.temperature", + "target": "server.layer_0.k", + "count": 6, + "mean_delta_ms": 0.766, + "std_delta_ms": 0.727, + "weight": 3.08 + }, + { + "source": "server.config.temperature", + "target": "server.layer_0.v", + "count": 6, + "mean_delta_ms": 0.772, + "std_delta_ms": 0.727, + "weight": 3.09 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_0", + "count": 6, + "mean_delta_ms": 0.779, + "std_delta_ms": 0.726, + "weight": 3.11 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_0.keys", + "count": 6, + "mean_delta_ms": 0.785, + "std_delta_ms": 0.725, + "weight": 3.12 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_0.values", + "count": 6, + "mean_delta_ms": 0.79, + "std_delta_ms": 0.725, + "weight": 3.13 + }, + { + "source": "server.config.temperature", + "target": "server.layer_0.ffn_up", + "count": 6, + "mean_delta_ms": 0.795, + "std_delta_ms": 0.724, + "weight": 3.14 + }, + { + "source": "server.config.temperature", + "target": "server.layer_0.ffn_down", + "count": 6, + "mean_delta_ms": 0.801, + "std_delta_ms": 0.724, + "weight": 3.15 + }, + { + "source": "server.config.temperature", + "target": "server.layer_1", + "count": 6, + "mean_delta_ms": 0.985, + "std_delta_ms": 0.719, + "weight": 3.47 + }, + { + "source": "server.config.temperature", + "target": "server.layer_1.q", + "count": 6, + "mean_delta_ms": 0.994, + "std_delta_ms": 0.717, + "weight": 3.48 + }, + { + "source": "server.config.temperature", + "target": "server.layer_1.k", + "count": 6, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.717, + "weight": 3.49 + }, + { + "source": "server.config.temperature", + "target": "server.layer_1.v", + "count": 6, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.717, + "weight": 3.5 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_1", + "count": 6, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.72, + "weight": 3.51 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_1.keys", + "count": 6, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.719, + "weight": 3.52 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_1.values", + "count": 6, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.719, + "weight": 3.53 + }, + { + "source": "server.config.temperature", + "target": "server.layer_1.ffn_up", + "count": 6, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.719, + "weight": 3.53 + }, + { + "source": "server.config.temperature", + "target": "server.layer_1.ffn_down", + "count": 6, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.718, + "weight": 3.55 + }, + { + "source": "server.config.temperature", + "target": "server.layer_2", + "count": 3, + "mean_delta_ms": 0.499, + "std_delta_ms": 0.052, + "weight": 2.72 + }, + { + "source": "server.config.temperature", + "target": "server.layer_2.q", + "count": 3, + "mean_delta_ms": 0.507, + "std_delta_ms": 0.055, + "weight": 2.71 + }, + { + "source": "server.config.temperature", + "target": "server.layer_2.k", + "count": 3, + "mean_delta_ms": 0.513, + "std_delta_ms": 0.056, + "weight": 2.71 + }, + { + "source": "server.config.temperature", + "target": "server.layer_2.v", + "count": 3, + "mean_delta_ms": 0.521, + "std_delta_ms": 0.054, + "weight": 2.72 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_2", + "count": 3, + "mean_delta_ms": 0.528, + "std_delta_ms": 0.055, + "weight": 2.72 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_2.keys", + "count": 3, + "mean_delta_ms": 0.534, + "std_delta_ms": 0.057, + "weight": 2.71 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_2.values", + "count": 3, + "mean_delta_ms": 0.54, + "std_delta_ms": 0.058, + "weight": 2.71 + }, + { + "source": "server.config.temperature", + "target": "server.layer_2.ffn_up", + "count": 3, + "mean_delta_ms": 0.544, + "std_delta_ms": 0.058, + "weight": 2.71 + }, + { + "source": "server.config.temperature", + "target": "server.layer_2.ffn_down", + "count": 3, + "mean_delta_ms": 0.55, + "std_delta_ms": 0.059, + "weight": 2.71 + }, + { + "source": "server.config.temperature", + "target": "server.layer_3", + "count": 3, + "mean_delta_ms": 0.738, + "std_delta_ms": 0.049, + "weight": 2.81 + }, + { + "source": "server.config.temperature", + "target": "server.layer_3.q", + "count": 3, + "mean_delta_ms": 0.756, + "std_delta_ms": 0.049, + "weight": 2.82 + }, + { + "source": "server.config.temperature", + "target": "server.layer_3.k", + "count": 3, + "mean_delta_ms": 0.762, + "std_delta_ms": 0.05, + "weight": 2.82 + }, + { + "source": "server.config.temperature", + "target": "server.layer_3.v", + "count": 3, + "mean_delta_ms": 0.779, + "std_delta_ms": 0.037, + "weight": 2.86 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_3", + "count": 3, + "mean_delta_ms": 0.786, + "std_delta_ms": 0.037, + "weight": 2.87 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_3.keys", + "count": 3, + "mean_delta_ms": 0.793, + "std_delta_ms": 0.039, + "weight": 2.86 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_3.values", + "count": 3, + "mean_delta_ms": 0.8, + "std_delta_ms": 0.039, + "weight": 2.86 + }, + { + "source": "server.config.temperature", + "target": "server.layer_3.ffn_up", + "count": 3, + "mean_delta_ms": 0.817, + "std_delta_ms": 0.057, + "weight": 2.8 + }, + { + "source": "server.config.temperature", + "target": "server.layer_3.ffn_down", + "count": 3, + "mean_delta_ms": 0.824, + "std_delta_ms": 0.059, + "weight": 2.8 + }, + { + "source": "server.config.temperature", + "target": "server.layer_4", + "count": 3, + "mean_delta_ms": 0.997, + "std_delta_ms": 0.06, + "weight": 2.83 + }, + { + "source": "server.config.temperature", + "target": "server.layer_4.q", + "count": 3, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.062, + "weight": 2.83 + }, + { + "source": "server.config.temperature", + "target": "server.layer_4.k", + "count": 3, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.065, + "weight": 2.82 + }, + { + "source": "server.config.temperature", + "target": "server.layer_4.v", + "count": 3, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.065, + "weight": 2.82 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_4", + "count": 3, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.066, + "weight": 2.82 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_4.keys", + "count": 3, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.068, + "weight": 2.82 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_4.values", + "count": 3, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.068, + "weight": 2.81 + }, + { + "source": "server.config.temperature", + "target": "server.layer_4.ffn_up", + "count": 3, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.069, + "weight": 2.81 + }, + { + "source": "server.config.temperature", + "target": "server.layer_4.ffn_down", + "count": 3, + "mean_delta_ms": 1.047, + "std_delta_ms": 0.069, + "weight": 2.81 + }, + { + "source": "server.config.temperature", + "target": "server.layer_5", + "count": 3, + "mean_delta_ms": 1.228, + "std_delta_ms": 0.081, + "weight": 2.81 + }, + { + "source": "server.config.temperature", + "target": "server.layer_5.q", + "count": 3, + "mean_delta_ms": 1.239, + "std_delta_ms": 0.086, + "weight": 2.8 + }, + { + "source": "server.config.temperature", + "target": "server.layer_5.k", + "count": 3, + "mean_delta_ms": 1.245, + "std_delta_ms": 0.088, + "weight": 2.8 + }, + { + "source": "server.config.temperature", + "target": "server.layer_5.v", + "count": 3, + "mean_delta_ms": 1.25, + "std_delta_ms": 0.088, + "weight": 2.8 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_5", + "count": 3, + "mean_delta_ms": 1.257, + "std_delta_ms": 0.089, + "weight": 2.8 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_5.keys", + "count": 3, + "mean_delta_ms": 1.265, + "std_delta_ms": 0.091, + "weight": 2.8 + }, + { + "source": "server.config.temperature", + "target": "server.kv_cache_5.values", + "count": 3, + "mean_delta_ms": 1.27, + "std_delta_ms": 0.091, + "weight": 2.8 + }, + { + "source": "server.config.temperature", + "target": "server.layer_5.ffn_up", + "count": 3, + "mean_delta_ms": 1.276, + "std_delta_ms": 0.092, + "weight": 2.8 + }, + { + "source": "server.config.temperature", + "target": "server.layer_5.ffn_down", + "count": 3, + "mean_delta_ms": 1.281, + "std_delta_ms": 0.093, + "weight": 2.8 + }, + { + "source": "server.config.temperature", + "target": "server.buffer.logits", + "count": 3, + "mean_delta_ms": 1.466, + "std_delta_ms": 0.095, + "weight": 2.82 + }, + { + "source": "server.buffer", + "target": "server.buffer.input_ids", + "count": 7, + "mean_delta_ms": 0.432, + "std_delta_ms": 0.646, + "weight": 2.81 + }, + { + "source": "server.buffer", + "target": "server.layer_0", + "count": 61, + "mean_delta_ms": 0.691, + "std_delta_ms": 0.705, + "weight": 30.2 + }, + { + "source": "server.buffer", + "target": "server.layer_0.q", + "count": 61, + "mean_delta_ms": 0.697, + "std_delta_ms": 0.705, + "weight": 30.33 + }, + { + "source": "server.buffer", + "target": "server.layer_0.k", + "count": 61, + "mean_delta_ms": 0.702, + "std_delta_ms": 0.705, + "weight": 30.44 + }, + { + "source": "server.buffer", + "target": "server.layer_0.v", + "count": 61, + "mean_delta_ms": 0.71, + "std_delta_ms": 0.705, + "weight": 30.61 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_0", + "count": 61, + "mean_delta_ms": 0.717, + "std_delta_ms": 0.705, + "weight": 30.75 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_0.keys", + "count": 61, + "mean_delta_ms": 0.723, + "std_delta_ms": 0.705, + "weight": 30.89 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_0.values", + "count": 61, + "mean_delta_ms": 0.728, + "std_delta_ms": 0.705, + "weight": 30.99 + }, + { + "source": "server.buffer", + "target": "server.layer_0.ffn_up", + "count": 61, + "mean_delta_ms": 0.733, + "std_delta_ms": 0.705, + "weight": 31.09 + }, + { + "source": "server.buffer", + "target": "server.layer_0.ffn_down", + "count": 61, + "mean_delta_ms": 0.738, + "std_delta_ms": 0.705, + "weight": 31.2 + }, + { + "source": "server.buffer", + "target": "server.layer_1", + "count": 61, + "mean_delta_ms": 0.916, + "std_delta_ms": 0.704, + "weight": 34.49 + }, + { + "source": "server.buffer", + "target": "server.layer_1.q", + "count": 61, + "mean_delta_ms": 0.923, + "std_delta_ms": 0.704, + "weight": 34.61 + }, + { + "source": "server.buffer", + "target": "server.layer_1.k", + "count": 60, + "mean_delta_ms": 0.91, + "std_delta_ms": 0.696, + "weight": 34.01 + }, + { + "source": "server.buffer", + "target": "server.layer_1.v", + "count": 60, + "mean_delta_ms": 0.915, + "std_delta_ms": 0.696, + "weight": 34.09 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_1", + "count": 60, + "mean_delta_ms": 0.922, + "std_delta_ms": 0.696, + "weight": 34.2 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_1.keys", + "count": 60, + "mean_delta_ms": 0.928, + "std_delta_ms": 0.696, + "weight": 34.29 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_1.values", + "count": 60, + "mean_delta_ms": 0.933, + "std_delta_ms": 0.696, + "weight": 34.37 + }, + { + "source": "server.buffer", + "target": "server.layer_1.ffn_up", + "count": 60, + "mean_delta_ms": 0.937, + "std_delta_ms": 0.696, + "weight": 34.44 + }, + { + "source": "server.buffer", + "target": "server.layer_1.ffn_down", + "count": 60, + "mean_delta_ms": 0.942, + "std_delta_ms": 0.695, + "weight": 34.52 + }, + { + "source": "server.buffer", + "target": "server.layer_2", + "count": 50, + "mean_delta_ms": 0.962, + "std_delta_ms": 0.628, + "weight": 30.25 + }, + { + "source": "server.buffer", + "target": "server.layer_2.q", + "count": 50, + "mean_delta_ms": 0.969, + "std_delta_ms": 0.628, + "weight": 30.34 + }, + { + "source": "server.buffer", + "target": "server.layer_2.k", + "count": 50, + "mean_delta_ms": 0.975, + "std_delta_ms": 0.628, + "weight": 30.4 + }, + { + "source": "server.buffer", + "target": "server.layer_2.v", + "count": 49, + "mean_delta_ms": 0.959, + "std_delta_ms": 0.617, + "weight": 29.83 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_2", + "count": 49, + "mean_delta_ms": 0.965, + "std_delta_ms": 0.616, + "weight": 29.9 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_2.keys", + "count": 49, + "mean_delta_ms": 0.971, + "std_delta_ms": 0.617, + "weight": 29.97 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_2.values", + "count": 48, + "mean_delta_ms": 0.955, + "std_delta_ms": 0.605, + "weight": 29.39 + }, + { + "source": "server.buffer", + "target": "server.layer_2.ffn_up", + "count": 48, + "mean_delta_ms": 0.959, + "std_delta_ms": 0.605, + "weight": 29.45 + }, + { + "source": "server.buffer", + "target": "server.layer_2.ffn_down", + "count": 47, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.594, + "weight": 28.95 + }, + { + "source": "server.buffer", + "target": "server.layer_3", + "count": 42, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.524, + "weight": 27.73 + }, + { + "source": "server.buffer", + "target": "server.layer_3.q", + "count": 42, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.523, + "weight": 27.82 + }, + { + "source": "server.buffer", + "target": "server.layer_3.k", + "count": 41, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.507, + "weight": 27.29 + }, + { + "source": "server.buffer", + "target": "server.layer_3.v", + "count": 41, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.506, + "weight": 27.37 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_3", + "count": 40, + "mean_delta_ms": 0.997, + "std_delta_ms": 0.487, + "weight": 26.87 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_3.keys", + "count": 40, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.487, + "weight": 26.92 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_3.values", + "count": 39, + "mean_delta_ms": 0.982, + "std_delta_ms": 0.466, + "weight": 26.45 + }, + { + "source": "server.buffer", + "target": "server.layer_3.ffn_up", + "count": 38, + "mean_delta_ms": 0.961, + "std_delta_ms": 0.441, + "weight": 26.05 + }, + { + "source": "server.buffer", + "target": "server.layer_3.ffn_down", + "count": 38, + "mean_delta_ms": 0.966, + "std_delta_ms": 0.441, + "weight": 26.09 + }, + { + "source": "server.buffer", + "target": "server.layer_4", + "count": 32, + "mean_delta_ms": 0.965, + "std_delta_ms": 0.126, + "weight": 28.32 + }, + { + "source": "server.buffer", + "target": "server.layer_4.q", + "count": 32, + "mean_delta_ms": 0.972, + "std_delta_ms": 0.127, + "weight": 28.31 + }, + { + "source": "server.buffer", + "target": "server.layer_4.k", + "count": 32, + "mean_delta_ms": 0.978, + "std_delta_ms": 0.129, + "weight": 28.28 + }, + { + "source": "server.buffer", + "target": "server.layer_4.v", + "count": 32, + "mean_delta_ms": 0.983, + "std_delta_ms": 0.129, + "weight": 28.28 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_4", + "count": 32, + "mean_delta_ms": 0.99, + "std_delta_ms": 0.13, + "weight": 28.28 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_4.keys", + "count": 32, + "mean_delta_ms": 0.996, + "std_delta_ms": 0.131, + "weight": 28.29 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_4.values", + "count": 32, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.132, + "weight": 28.28 + }, + { + "source": "server.buffer", + "target": "server.layer_4.ffn_up", + "count": 32, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.131, + "weight": 28.3 + }, + { + "source": "server.buffer", + "target": "server.layer_4.ffn_down", + "count": 32, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.132, + "weight": 28.31 + }, + { + "source": "server.buffer", + "target": "server.layer_5", + "count": 32, + "mean_delta_ms": 1.206, + "std_delta_ms": 0.15, + "weight": 28.47 + }, + { + "source": "server.buffer", + "target": "server.layer_5.q", + "count": 32, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.152, + "weight": 28.44 + }, + { + "source": "server.buffer", + "target": "server.layer_5.k", + "count": 32, + "mean_delta_ms": 1.22, + "std_delta_ms": 0.153, + "weight": 28.44 + }, + { + "source": "server.buffer", + "target": "server.layer_5.v", + "count": 32, + "mean_delta_ms": 1.225, + "std_delta_ms": 0.153, + "weight": 28.45 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_5", + "count": 32, + "mean_delta_ms": 1.231, + "std_delta_ms": 0.153, + "weight": 28.45 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_5.keys", + "count": 32, + "mean_delta_ms": 1.237, + "std_delta_ms": 0.154, + "weight": 28.46 + }, + { + "source": "server.buffer", + "target": "server.kv_cache_5.values", + "count": 32, + "mean_delta_ms": 1.242, + "std_delta_ms": 0.154, + "weight": 28.47 + }, + { + "source": "server.buffer", + "target": "server.layer_5.ffn_up", + "count": 32, + "mean_delta_ms": 1.247, + "std_delta_ms": 0.155, + "weight": 28.47 + }, + { + "source": "server.buffer", + "target": "server.layer_5.ffn_down", + "count": 32, + "mean_delta_ms": 1.252, + "std_delta_ms": 0.155, + "weight": 28.48 + }, + { + "source": "server.buffer", + "target": "server.buffer.logits", + "count": 60, + "mean_delta_ms": 0.71, + "std_delta_ms": 0.704, + "weight": 30.13 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_0", + "count": 6, + "mean_delta_ms": 0.738, + "std_delta_ms": 0.728, + "weight": 3.02 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_0.q", + "count": 6, + "mean_delta_ms": 0.745, + "std_delta_ms": 0.727, + "weight": 3.04 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_0.k", + "count": 6, + "mean_delta_ms": 0.751, + "std_delta_ms": 0.726, + "weight": 3.05 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_0.v", + "count": 6, + "mean_delta_ms": 0.756, + "std_delta_ms": 0.726, + "weight": 3.06 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_0", + "count": 6, + "mean_delta_ms": 0.763, + "std_delta_ms": 0.726, + "weight": 3.08 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_0.keys", + "count": 6, + "mean_delta_ms": 0.77, + "std_delta_ms": 0.725, + "weight": 3.09 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_0.values", + "count": 6, + "mean_delta_ms": 0.775, + "std_delta_ms": 0.724, + "weight": 3.1 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_0.ffn_up", + "count": 6, + "mean_delta_ms": 0.78, + "std_delta_ms": 0.724, + "weight": 3.11 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_0.ffn_down", + "count": 6, + "mean_delta_ms": 0.785, + "std_delta_ms": 0.724, + "weight": 3.12 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_1", + "count": 6, + "mean_delta_ms": 0.969, + "std_delta_ms": 0.718, + "weight": 3.45 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_1.q", + "count": 6, + "mean_delta_ms": 0.978, + "std_delta_ms": 0.717, + "weight": 3.46 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_1.k", + "count": 6, + "mean_delta_ms": 0.984, + "std_delta_ms": 0.716, + "weight": 3.47 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_1.v", + "count": 6, + "mean_delta_ms": 0.989, + "std_delta_ms": 0.716, + "weight": 3.48 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_1", + "count": 6, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.719, + "weight": 3.49 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_1.keys", + "count": 6, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.718, + "weight": 3.5 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_1.values", + "count": 6, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.719, + "weight": 3.51 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_1.ffn_up", + "count": 6, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.719, + "weight": 3.51 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_1.ffn_down", + "count": 6, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.718, + "weight": 3.52 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_2", + "count": 3, + "mean_delta_ms": 0.483, + "std_delta_ms": 0.046, + "weight": 2.74 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_2.q", + "count": 3, + "mean_delta_ms": 0.492, + "std_delta_ms": 0.049, + "weight": 2.73 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_2.k", + "count": 3, + "mean_delta_ms": 0.497, + "std_delta_ms": 0.05, + "weight": 2.73 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_2.v", + "count": 3, + "mean_delta_ms": 0.506, + "std_delta_ms": 0.048, + "weight": 2.74 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_2", + "count": 3, + "mean_delta_ms": 0.512, + "std_delta_ms": 0.049, + "weight": 2.74 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_2.keys", + "count": 3, + "mean_delta_ms": 0.519, + "std_delta_ms": 0.051, + "weight": 2.73 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_2.values", + "count": 3, + "mean_delta_ms": 0.524, + "std_delta_ms": 0.052, + "weight": 2.73 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_2.ffn_up", + "count": 3, + "mean_delta_ms": 0.529, + "std_delta_ms": 0.052, + "weight": 2.73 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_2.ffn_down", + "count": 3, + "mean_delta_ms": 0.534, + "std_delta_ms": 0.053, + "weight": 2.73 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_3", + "count": 3, + "mean_delta_ms": 0.723, + "std_delta_ms": 0.043, + "weight": 2.83 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_3.q", + "count": 3, + "mean_delta_ms": 0.74, + "std_delta_ms": 0.045, + "weight": 2.83 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_3.k", + "count": 3, + "mean_delta_ms": 0.746, + "std_delta_ms": 0.045, + "weight": 2.83 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_3.v", + "count": 3, + "mean_delta_ms": 0.763, + "std_delta_ms": 0.032, + "weight": 2.88 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_3", + "count": 3, + "mean_delta_ms": 0.77, + "std_delta_ms": 0.032, + "weight": 2.88 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_3.keys", + "count": 3, + "mean_delta_ms": 0.778, + "std_delta_ms": 0.034, + "weight": 2.87 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_3.values", + "count": 3, + "mean_delta_ms": 0.784, + "std_delta_ms": 0.034, + "weight": 2.87 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_3.ffn_up", + "count": 3, + "mean_delta_ms": 0.802, + "std_delta_ms": 0.052, + "weight": 2.82 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_3.ffn_down", + "count": 3, + "mean_delta_ms": 0.808, + "std_delta_ms": 0.053, + "weight": 2.81 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_4", + "count": 3, + "mean_delta_ms": 0.981, + "std_delta_ms": 0.054, + "weight": 2.84 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_4.q", + "count": 3, + "mean_delta_ms": 0.989, + "std_delta_ms": 0.057, + "weight": 2.84 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_4.k", + "count": 3, + "mean_delta_ms": 0.996, + "std_delta_ms": 0.06, + "weight": 2.83 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_4.v", + "count": 3, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.06, + "weight": 2.83 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_4", + "count": 3, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.06, + "weight": 2.83 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_4.keys", + "count": 3, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.062, + "weight": 2.83 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_4.values", + "count": 3, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.063, + "weight": 2.83 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_4.ffn_up", + "count": 3, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.063, + "weight": 2.83 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_4.ffn_down", + "count": 3, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.064, + "weight": 2.83 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_5", + "count": 3, + "mean_delta_ms": 1.213, + "std_delta_ms": 0.076, + "weight": 2.82 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_5.q", + "count": 3, + "mean_delta_ms": 1.223, + "std_delta_ms": 0.081, + "weight": 2.81 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_5.k", + "count": 3, + "mean_delta_ms": 1.229, + "std_delta_ms": 0.082, + "weight": 2.81 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_5.v", + "count": 3, + "mean_delta_ms": 1.235, + "std_delta_ms": 0.083, + "weight": 2.81 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_5", + "count": 3, + "mean_delta_ms": 1.241, + "std_delta_ms": 0.084, + "weight": 2.81 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_5.keys", + "count": 3, + "mean_delta_ms": 1.249, + "std_delta_ms": 0.085, + "weight": 2.81 + }, + { + "source": "server.buffer.input_ids", + "target": "server.kv_cache_5.values", + "count": 3, + "mean_delta_ms": 1.255, + "std_delta_ms": 0.086, + "weight": 2.81 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_5.ffn_up", + "count": 3, + "mean_delta_ms": 1.26, + "std_delta_ms": 0.087, + "weight": 2.81 + }, + { + "source": "server.buffer.input_ids", + "target": "server.layer_5.ffn_down", + "count": 3, + "mean_delta_ms": 1.266, + "std_delta_ms": 0.087, + "weight": 2.81 + }, + { + "source": "server.buffer.input_ids", + "target": "server.buffer", + "count": 3, + "mean_delta_ms": 1.442, + "std_delta_ms": 0.089, + "weight": 2.83 + }, + { + "source": "server.buffer.input_ids", + "target": "server.buffer.logits", + "count": 3, + "mean_delta_ms": 1.45, + "std_delta_ms": 0.089, + "weight": 2.83 + }, + { + "source": "server.layer_0", + "target": "server.layer_0.q", + "count": 57, + "mean_delta_ms": 0.668, + "std_delta_ms": 0.704, + "weight": 27.76 + }, + { + "source": "server.layer_0", + "target": "server.layer_0.k", + "count": 57, + "mean_delta_ms": 0.673, + "std_delta_ms": 0.704, + "weight": 27.86 + }, + { + "source": "server.layer_0", + "target": "server.layer_0.v", + "count": 57, + "mean_delta_ms": 0.682, + "std_delta_ms": 0.705, + "weight": 28.03 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_0", + "count": 57, + "mean_delta_ms": 0.689, + "std_delta_ms": 0.705, + "weight": 28.17 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_0.keys", + "count": 57, + "mean_delta_ms": 0.695, + "std_delta_ms": 0.704, + "weight": 28.3 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_0.values", + "count": 57, + "mean_delta_ms": 0.699, + "std_delta_ms": 0.704, + "weight": 28.4 + }, + { + "source": "server.layer_0", + "target": "server.layer_0.ffn_up", + "count": 57, + "mean_delta_ms": 0.704, + "std_delta_ms": 0.704, + "weight": 28.5 + }, + { + "source": "server.layer_0", + "target": "server.layer_0.ffn_down", + "count": 57, + "mean_delta_ms": 0.709, + "std_delta_ms": 0.704, + "weight": 28.6 + }, + { + "source": "server.layer_0", + "target": "server.layer_1", + "count": 57, + "mean_delta_ms": 0.888, + "std_delta_ms": 0.703, + "weight": 31.8 + }, + { + "source": "server.layer_0", + "target": "server.layer_1.q", + "count": 57, + "mean_delta_ms": 0.894, + "std_delta_ms": 0.703, + "weight": 31.91 + }, + { + "source": "server.layer_0", + "target": "server.layer_1.k", + "count": 57, + "mean_delta_ms": 0.9, + "std_delta_ms": 0.703, + "weight": 32.0 + }, + { + "source": "server.layer_0", + "target": "server.layer_1.v", + "count": 57, + "mean_delta_ms": 0.905, + "std_delta_ms": 0.703, + "weight": 32.08 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_1", + "count": 56, + "mean_delta_ms": 0.892, + "std_delta_ms": 0.694, + "weight": 31.5 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_1.keys", + "count": 56, + "mean_delta_ms": 0.898, + "std_delta_ms": 0.694, + "weight": 31.59 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_1.values", + "count": 56, + "mean_delta_ms": 0.903, + "std_delta_ms": 0.694, + "weight": 31.66 + }, + { + "source": "server.layer_0", + "target": "server.layer_1.ffn_up", + "count": 56, + "mean_delta_ms": 0.908, + "std_delta_ms": 0.694, + "weight": 31.74 + }, + { + "source": "server.layer_0", + "target": "server.layer_1.ffn_down", + "count": 56, + "mean_delta_ms": 0.913, + "std_delta_ms": 0.694, + "weight": 31.82 + }, + { + "source": "server.layer_0", + "target": "server.layer_2", + "count": 50, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.652, + "weight": 30.25 + }, + { + "source": "server.layer_0", + "target": "server.layer_2.q", + "count": 50, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.651, + "weight": 30.34 + }, + { + "source": "server.layer_0", + "target": "server.layer_2.k", + "count": 49, + "mean_delta_ms": 0.991, + "std_delta_ms": 0.642, + "weight": 29.73 + }, + { + "source": "server.layer_0", + "target": "server.layer_2.v", + "count": 49, + "mean_delta_ms": 0.996, + "std_delta_ms": 0.642, + "weight": 29.79 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_2", + "count": 47, + "mean_delta_ms": 0.96, + "std_delta_ms": 0.621, + "weight": 28.54 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_2.keys", + "count": 47, + "mean_delta_ms": 0.965, + "std_delta_ms": 0.621, + "weight": 28.6 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_2.values", + "count": 47, + "mean_delta_ms": 0.97, + "std_delta_ms": 0.621, + "weight": 28.66 + }, + { + "source": "server.layer_0", + "target": "server.layer_2.ffn_up", + "count": 47, + "mean_delta_ms": 0.975, + "std_delta_ms": 0.621, + "weight": 28.72 + }, + { + "source": "server.layer_0", + "target": "server.layer_2.ffn_down", + "count": 46, + "mean_delta_ms": 0.969, + "std_delta_ms": 0.611, + "weight": 28.2 + }, + { + "source": "server.layer_0", + "target": "server.layer_3", + "count": 40, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.535, + "weight": 26.15 + }, + { + "source": "server.layer_0", + "target": "server.layer_3.q", + "count": 40, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.534, + "weight": 26.22 + }, + { + "source": "server.layer_0", + "target": "server.layer_3.k", + "count": 40, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.535, + "weight": 26.27 + }, + { + "source": "server.layer_0", + "target": "server.layer_3.v", + "count": 40, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.534, + "weight": 26.34 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_3", + "count": 39, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.517, + "weight": 25.8 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_3.keys", + "count": 39, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.517, + "weight": 25.84 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_3.values", + "count": 39, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.517, + "weight": 25.89 + }, + { + "source": "server.layer_0", + "target": "server.layer_3.ffn_up", + "count": 38, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.498, + "weight": 25.37 + }, + { + "source": "server.layer_0", + "target": "server.layer_3.ffn_down", + "count": 38, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.498, + "weight": 25.42 + }, + { + "source": "server.layer_0", + "target": "server.layer_4", + "count": 30, + "mean_delta_ms": 0.94, + "std_delta_ms": 0.125, + "weight": 26.49 + }, + { + "source": "server.layer_0", + "target": "server.layer_4.q", + "count": 30, + "mean_delta_ms": 0.947, + "std_delta_ms": 0.126, + "weight": 26.48 + }, + { + "source": "server.layer_0", + "target": "server.layer_4.k", + "count": 30, + "mean_delta_ms": 0.953, + "std_delta_ms": 0.128, + "weight": 26.44 + }, + { + "source": "server.layer_0", + "target": "server.layer_4.v", + "count": 30, + "mean_delta_ms": 0.958, + "std_delta_ms": 0.129, + "weight": 26.45 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_4", + "count": 30, + "mean_delta_ms": 0.964, + "std_delta_ms": 0.129, + "weight": 26.47 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_4.keys", + "count": 30, + "mean_delta_ms": 0.97, + "std_delta_ms": 0.129, + "weight": 26.47 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_4.values", + "count": 30, + "mean_delta_ms": 0.976, + "std_delta_ms": 0.13, + "weight": 26.47 + }, + { + "source": "server.layer_0", + "target": "server.layer_4.ffn_up", + "count": 30, + "mean_delta_ms": 0.981, + "std_delta_ms": 0.13, + "weight": 26.49 + }, + { + "source": "server.layer_0", + "target": "server.layer_4.ffn_down", + "count": 30, + "mean_delta_ms": 0.986, + "std_delta_ms": 0.13, + "weight": 26.5 + }, + { + "source": "server.layer_0", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 1.182, + "std_delta_ms": 0.151, + "weight": 26.61 + }, + { + "source": "server.layer_0", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 1.191, + "std_delta_ms": 0.153, + "weight": 26.58 + }, + { + "source": "server.layer_0", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 1.196, + "std_delta_ms": 0.154, + "weight": 26.58 + }, + { + "source": "server.layer_0", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 1.201, + "std_delta_ms": 0.154, + "weight": 26.59 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 1.207, + "std_delta_ms": 0.155, + "weight": 26.6 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 1.213, + "std_delta_ms": 0.155, + "weight": 26.6 + }, + { + "source": "server.layer_0", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 1.218, + "std_delta_ms": 0.155, + "weight": 26.61 + }, + { + "source": "server.layer_0", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 1.223, + "std_delta_ms": 0.156, + "weight": 26.61 + }, + { + "source": "server.layer_0", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 1.228, + "std_delta_ms": 0.156, + "weight": 26.62 + }, + { + "source": "server.layer_0", + "target": "server.buffer", + "count": 31, + "mean_delta_ms": 1.397, + "std_delta_ms": 0.163, + "weight": 27.75 + }, + { + "source": "server.layer_0", + "target": "server.buffer.logits", + "count": 28, + "mean_delta_ms": 1.381, + "std_delta_ms": 0.128, + "weight": 25.62 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_0.k", + "count": 57, + "mean_delta_ms": 0.667, + "std_delta_ms": 0.704, + "weight": 27.74 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_0.v", + "count": 57, + "mean_delta_ms": 0.676, + "std_delta_ms": 0.705, + "weight": 27.9 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_0", + "count": 57, + "mean_delta_ms": 0.683, + "std_delta_ms": 0.705, + "weight": 28.05 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_0.keys", + "count": 57, + "mean_delta_ms": 0.689, + "std_delta_ms": 0.704, + "weight": 28.18 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_0.values", + "count": 57, + "mean_delta_ms": 0.693, + "std_delta_ms": 0.704, + "weight": 28.28 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_0.ffn_up", + "count": 57, + "mean_delta_ms": 0.698, + "std_delta_ms": 0.704, + "weight": 28.38 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_0.ffn_down", + "count": 57, + "mean_delta_ms": 0.703, + "std_delta_ms": 0.704, + "weight": 28.48 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_1", + "count": 57, + "mean_delta_ms": 0.882, + "std_delta_ms": 0.703, + "weight": 31.71 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_1.q", + "count": 57, + "mean_delta_ms": 0.888, + "std_delta_ms": 0.703, + "weight": 31.82 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_1.k", + "count": 57, + "mean_delta_ms": 0.894, + "std_delta_ms": 0.703, + "weight": 31.91 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_1.v", + "count": 57, + "mean_delta_ms": 0.899, + "std_delta_ms": 0.703, + "weight": 31.99 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_1", + "count": 57, + "mean_delta_ms": 0.906, + "std_delta_ms": 0.703, + "weight": 32.1 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_1.keys", + "count": 57, + "mean_delta_ms": 0.911, + "std_delta_ms": 0.703, + "weight": 32.18 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_1.values", + "count": 56, + "mean_delta_ms": 0.897, + "std_delta_ms": 0.694, + "weight": 31.57 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_1.ffn_up", + "count": 56, + "mean_delta_ms": 0.902, + "std_delta_ms": 0.694, + "weight": 31.65 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_1.ffn_down", + "count": 56, + "mean_delta_ms": 0.907, + "std_delta_ms": 0.694, + "weight": 31.73 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_2", + "count": 50, + "mean_delta_ms": 0.993, + "std_delta_ms": 0.652, + "weight": 30.18 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_2.q", + "count": 50, + "mean_delta_ms": 1.0, + "std_delta_ms": 0.652, + "weight": 30.27 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_2.k", + "count": 50, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.651, + "weight": 30.34 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_2.v", + "count": 50, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.651, + "weight": 30.4 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_2", + "count": 49, + "mean_delta_ms": 0.996, + "std_delta_ms": 0.642, + "weight": 29.8 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_2.keys", + "count": 47, + "mean_delta_ms": 0.96, + "std_delta_ms": 0.621, + "weight": 28.53 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_2.values", + "count": 47, + "mean_delta_ms": 0.965, + "std_delta_ms": 0.621, + "weight": 28.59 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_2.ffn_up", + "count": 47, + "mean_delta_ms": 0.97, + "std_delta_ms": 0.621, + "weight": 28.65 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_2.ffn_down", + "count": 46, + "mean_delta_ms": 0.963, + "std_delta_ms": 0.611, + "weight": 28.13 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_3", + "count": 40, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.535, + "weight": 26.09 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_3.q", + "count": 40, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.535, + "weight": 26.17 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_3.k", + "count": 40, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.535, + "weight": 26.22 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_3.v", + "count": 40, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.534, + "weight": 26.28 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_3", + "count": 40, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.534, + "weight": 26.34 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_3.keys", + "count": 39, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.517, + "weight": 25.79 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_3.values", + "count": 39, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.517, + "weight": 25.84 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_3.ffn_up", + "count": 39, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.517, + "weight": 25.89 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_3.ffn_down", + "count": 38, + "mean_delta_ms": 1.0, + "std_delta_ms": 0.498, + "weight": 25.37 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_4", + "count": 30, + "mean_delta_ms": 0.934, + "std_delta_ms": 0.124, + "weight": 26.48 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_4.q", + "count": 30, + "mean_delta_ms": 0.941, + "std_delta_ms": 0.125, + "weight": 26.47 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_4.k", + "count": 30, + "mean_delta_ms": 0.947, + "std_delta_ms": 0.128, + "weight": 26.44 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_4.v", + "count": 30, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.128, + "weight": 26.44 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_4", + "count": 30, + "mean_delta_ms": 0.959, + "std_delta_ms": 0.128, + "weight": 26.46 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_4.keys", + "count": 30, + "mean_delta_ms": 0.964, + "std_delta_ms": 0.129, + "weight": 26.47 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_4.values", + "count": 30, + "mean_delta_ms": 0.97, + "std_delta_ms": 0.13, + "weight": 26.46 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_4.ffn_up", + "count": 30, + "mean_delta_ms": 0.975, + "std_delta_ms": 0.13, + "weight": 26.48 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_4.ffn_down", + "count": 30, + "mean_delta_ms": 0.98, + "std_delta_ms": 0.13, + "weight": 26.49 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 1.176, + "std_delta_ms": 0.15, + "weight": 26.6 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 1.185, + "std_delta_ms": 0.153, + "weight": 26.57 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 1.19, + "std_delta_ms": 0.153, + "weight": 26.58 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 1.195, + "std_delta_ms": 0.154, + "weight": 26.58 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 1.201, + "std_delta_ms": 0.154, + "weight": 26.59 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 1.207, + "std_delta_ms": 0.154, + "weight": 26.6 + }, + { + "source": "server.layer_0.q", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 1.212, + "std_delta_ms": 0.155, + "weight": 26.6 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 1.217, + "std_delta_ms": 0.155, + "weight": 26.61 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 1.222, + "std_delta_ms": 0.156, + "weight": 26.61 + }, + { + "source": "server.layer_0.q", + "target": "server.buffer", + "count": 31, + "mean_delta_ms": 1.391, + "std_delta_ms": 0.163, + "weight": 27.75 + }, + { + "source": "server.layer_0.q", + "target": "server.buffer.logits", + "count": 28, + "mean_delta_ms": 1.376, + "std_delta_ms": 0.128, + "weight": 25.62 + }, + { + "source": "server.layer_0.q", + "target": "server.layer_0", + "count": 27, + "mean_delta_ms": 1.393, + "std_delta_ms": 0.131, + "weight": 24.68 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_0.v", + "count": 57, + "mean_delta_ms": 0.671, + "std_delta_ms": 0.705, + "weight": 27.8 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_0", + "count": 57, + "mean_delta_ms": 0.678, + "std_delta_ms": 0.705, + "weight": 27.94 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_0.keys", + "count": 57, + "mean_delta_ms": 0.684, + "std_delta_ms": 0.704, + "weight": 28.08 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_0.values", + "count": 57, + "mean_delta_ms": 0.688, + "std_delta_ms": 0.704, + "weight": 28.18 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_0.ffn_up", + "count": 57, + "mean_delta_ms": 0.693, + "std_delta_ms": 0.704, + "weight": 28.28 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_0.ffn_down", + "count": 57, + "mean_delta_ms": 0.698, + "std_delta_ms": 0.704, + "weight": 28.38 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_1", + "count": 57, + "mean_delta_ms": 0.877, + "std_delta_ms": 0.703, + "weight": 31.63 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_1.q", + "count": 57, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.703, + "weight": 31.74 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_1.k", + "count": 57, + "mean_delta_ms": 0.889, + "std_delta_ms": 0.703, + "weight": 31.83 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_1.v", + "count": 57, + "mean_delta_ms": 0.894, + "std_delta_ms": 0.703, + "weight": 31.91 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_1", + "count": 57, + "mean_delta_ms": 0.901, + "std_delta_ms": 0.703, + "weight": 32.02 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_1.keys", + "count": 57, + "mean_delta_ms": 0.906, + "std_delta_ms": 0.703, + "weight": 32.1 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_1.values", + "count": 56, + "mean_delta_ms": 0.892, + "std_delta_ms": 0.694, + "weight": 31.5 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_1.ffn_up", + "count": 56, + "mean_delta_ms": 0.897, + "std_delta_ms": 0.694, + "weight": 31.57 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_1.ffn_down", + "count": 56, + "mean_delta_ms": 0.902, + "std_delta_ms": 0.694, + "weight": 31.65 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_2", + "count": 50, + "mean_delta_ms": 0.988, + "std_delta_ms": 0.652, + "weight": 30.12 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_2.q", + "count": 50, + "mean_delta_ms": 0.995, + "std_delta_ms": 0.652, + "weight": 30.21 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_2.k", + "count": 50, + "mean_delta_ms": 1.0, + "std_delta_ms": 0.651, + "weight": 30.28 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_2.v", + "count": 50, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.651, + "weight": 30.34 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_2", + "count": 49, + "mean_delta_ms": 0.992, + "std_delta_ms": 0.642, + "weight": 29.74 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_2.keys", + "count": 49, + "mean_delta_ms": 0.997, + "std_delta_ms": 0.642, + "weight": 29.8 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_2.values", + "count": 47, + "mean_delta_ms": 0.96, + "std_delta_ms": 0.621, + "weight": 28.53 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_2.ffn_up", + "count": 47, + "mean_delta_ms": 0.965, + "std_delta_ms": 0.621, + "weight": 28.59 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_2.ffn_down", + "count": 46, + "mean_delta_ms": 0.958, + "std_delta_ms": 0.611, + "weight": 28.08 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_3", + "count": 40, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.535, + "weight": 26.04 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_3.q", + "count": 40, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.535, + "weight": 26.12 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_3.k", + "count": 40, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.535, + "weight": 26.17 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_3.v", + "count": 40, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.534, + "weight": 26.24 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_3", + "count": 40, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.534, + "weight": 26.3 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_3.keys", + "count": 40, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.534, + "weight": 26.34 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_3.values", + "count": 39, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.517, + "weight": 25.79 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_3.ffn_up", + "count": 39, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.517, + "weight": 25.85 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_3.ffn_down", + "count": 39, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.517, + "weight": 25.9 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_4", + "count": 30, + "mean_delta_ms": 0.929, + "std_delta_ms": 0.124, + "weight": 26.47 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_4.q", + "count": 30, + "mean_delta_ms": 0.936, + "std_delta_ms": 0.125, + "weight": 26.46 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_4.k", + "count": 30, + "mean_delta_ms": 0.942, + "std_delta_ms": 0.127, + "weight": 26.43 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_4.v", + "count": 30, + "mean_delta_ms": 0.947, + "std_delta_ms": 0.128, + "weight": 26.43 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_4", + "count": 30, + "mean_delta_ms": 0.954, + "std_delta_ms": 0.128, + "weight": 26.45 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_4.keys", + "count": 30, + "mean_delta_ms": 0.959, + "std_delta_ms": 0.128, + "weight": 26.46 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_4.values", + "count": 30, + "mean_delta_ms": 0.965, + "std_delta_ms": 0.129, + "weight": 26.45 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_4.ffn_up", + "count": 30, + "mean_delta_ms": 0.97, + "std_delta_ms": 0.129, + "weight": 26.47 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_4.ffn_down", + "count": 30, + "mean_delta_ms": 0.975, + "std_delta_ms": 0.13, + "weight": 26.48 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 1.171, + "std_delta_ms": 0.15, + "weight": 26.6 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 1.18, + "std_delta_ms": 0.153, + "weight": 26.56 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 1.185, + "std_delta_ms": 0.153, + "weight": 26.57 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 1.19, + "std_delta_ms": 0.153, + "weight": 26.58 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 1.196, + "std_delta_ms": 0.154, + "weight": 26.58 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 1.202, + "std_delta_ms": 0.154, + "weight": 26.59 + }, + { + "source": "server.layer_0.k", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 1.207, + "std_delta_ms": 0.155, + "weight": 26.59 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 1.212, + "std_delta_ms": 0.155, + "weight": 26.6 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 1.217, + "std_delta_ms": 0.155, + "weight": 26.6 + }, + { + "source": "server.layer_0.k", + "target": "server.buffer", + "count": 31, + "mean_delta_ms": 1.386, + "std_delta_ms": 0.163, + "weight": 27.75 + }, + { + "source": "server.layer_0.k", + "target": "server.buffer.logits", + "count": 28, + "mean_delta_ms": 1.371, + "std_delta_ms": 0.127, + "weight": 25.62 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_0", + "count": 27, + "mean_delta_ms": 1.388, + "std_delta_ms": 0.131, + "weight": 24.68 + }, + { + "source": "server.layer_0.k", + "target": "server.layer_0.q", + "count": 27, + "mean_delta_ms": 1.393, + "std_delta_ms": 0.131, + "weight": 24.68 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_0", + "count": 57, + "mean_delta_ms": 0.669, + "std_delta_ms": 0.704, + "weight": 27.77 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_0.keys", + "count": 57, + "mean_delta_ms": 0.675, + "std_delta_ms": 0.704, + "weight": 27.91 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_0.values", + "count": 57, + "mean_delta_ms": 0.68, + "std_delta_ms": 0.704, + "weight": 28.01 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_0.ffn_up", + "count": 57, + "mean_delta_ms": 0.685, + "std_delta_ms": 0.704, + "weight": 28.11 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_0.ffn_down", + "count": 57, + "mean_delta_ms": 0.69, + "std_delta_ms": 0.704, + "weight": 28.22 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_1", + "count": 57, + "mean_delta_ms": 0.868, + "std_delta_ms": 0.703, + "weight": 31.5 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_1.q", + "count": 57, + "mean_delta_ms": 0.875, + "std_delta_ms": 0.702, + "weight": 31.62 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_1.k", + "count": 57, + "mean_delta_ms": 0.88, + "std_delta_ms": 0.702, + "weight": 31.7 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_1.v", + "count": 57, + "mean_delta_ms": 0.885, + "std_delta_ms": 0.702, + "weight": 31.79 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_1", + "count": 57, + "mean_delta_ms": 0.892, + "std_delta_ms": 0.702, + "weight": 31.89 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_1.keys", + "count": 57, + "mean_delta_ms": 0.898, + "std_delta_ms": 0.702, + "weight": 31.98 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_1.values", + "count": 57, + "mean_delta_ms": 0.903, + "std_delta_ms": 0.702, + "weight": 32.06 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_1.ffn_up", + "count": 56, + "mean_delta_ms": 0.888, + "std_delta_ms": 0.693, + "weight": 31.45 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_1.ffn_down", + "count": 56, + "mean_delta_ms": 0.893, + "std_delta_ms": 0.693, + "weight": 31.53 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_2", + "count": 51, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.66, + "weight": 30.71 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_2.q", + "count": 51, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.66, + "weight": 30.8 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_2.k", + "count": 51, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.66, + "weight": 30.87 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_2.v", + "count": 51, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.66, + "weight": 30.93 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_2", + "count": 51, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.66, + "weight": 31.01 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_2.keys", + "count": 50, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.651, + "weight": 30.39 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_2.values", + "count": 50, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.651, + "weight": 30.45 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_2.ffn_up", + "count": 49, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.642, + "weight": 29.82 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_2.ffn_down", + "count": 47, + "mean_delta_ms": 0.972, + "std_delta_ms": 0.624, + "weight": 28.63 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_3", + "count": 40, + "mean_delta_ms": 0.991, + "std_delta_ms": 0.536, + "weight": 25.96 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_3.q", + "count": 40, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.536, + "weight": 26.04 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_3.k", + "count": 40, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.536, + "weight": 26.09 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_3.v", + "count": 40, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.535, + "weight": 26.15 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_3", + "count": 40, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.535, + "weight": 26.21 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_3.keys", + "count": 40, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.535, + "weight": 26.26 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_3.values", + "count": 40, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.535, + "weight": 26.31 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_3.ffn_up", + "count": 39, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.518, + "weight": 25.77 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_3.ffn_down", + "count": 39, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.518, + "weight": 25.81 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_4", + "count": 30, + "mean_delta_ms": 0.92, + "std_delta_ms": 0.122, + "weight": 26.5 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_4.q", + "count": 30, + "mean_delta_ms": 0.927, + "std_delta_ms": 0.123, + "weight": 26.49 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_4.k", + "count": 30, + "mean_delta_ms": 0.933, + "std_delta_ms": 0.125, + "weight": 26.46 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_4.v", + "count": 30, + "mean_delta_ms": 0.938, + "std_delta_ms": 0.125, + "weight": 26.46 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_4", + "count": 30, + "mean_delta_ms": 0.945, + "std_delta_ms": 0.126, + "weight": 26.48 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_4.keys", + "count": 30, + "mean_delta_ms": 0.951, + "std_delta_ms": 0.126, + "weight": 26.49 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_4.values", + "count": 30, + "mean_delta_ms": 0.956, + "std_delta_ms": 0.127, + "weight": 26.48 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_4.ffn_up", + "count": 30, + "mean_delta_ms": 0.962, + "std_delta_ms": 0.127, + "weight": 26.5 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_4.ffn_down", + "count": 30, + "mean_delta_ms": 0.967, + "std_delta_ms": 0.127, + "weight": 26.51 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 1.162, + "std_delta_ms": 0.148, + "weight": 26.61 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 1.171, + "std_delta_ms": 0.151, + "weight": 26.57 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 1.177, + "std_delta_ms": 0.152, + "weight": 26.58 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 1.182, + "std_delta_ms": 0.152, + "weight": 26.58 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 1.188, + "std_delta_ms": 0.152, + "weight": 26.59 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 1.194, + "std_delta_ms": 0.153, + "weight": 26.6 + }, + { + "source": "server.layer_0.v", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 1.199, + "std_delta_ms": 0.153, + "weight": 26.6 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 1.204, + "std_delta_ms": 0.153, + "weight": 26.61 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 1.209, + "std_delta_ms": 0.154, + "weight": 26.61 + }, + { + "source": "server.layer_0.v", + "target": "server.buffer", + "count": 31, + "mean_delta_ms": 1.378, + "std_delta_ms": 0.161, + "weight": 27.75 + }, + { + "source": "server.layer_0.v", + "target": "server.buffer.logits", + "count": 28, + "mean_delta_ms": 1.362, + "std_delta_ms": 0.125, + "weight": 25.65 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_0", + "count": 27, + "mean_delta_ms": 1.379, + "std_delta_ms": 0.128, + "weight": 24.71 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_0.q", + "count": 27, + "mean_delta_ms": 1.385, + "std_delta_ms": 0.128, + "weight": 24.71 + }, + { + "source": "server.layer_0.v", + "target": "server.layer_0.k", + "count": 27, + "mean_delta_ms": 1.389, + "std_delta_ms": 0.128, + "weight": 24.72 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_0.keys", + "count": 57, + "mean_delta_ms": 0.668, + "std_delta_ms": 0.704, + "weight": 27.76 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_0.values", + "count": 57, + "mean_delta_ms": 0.673, + "std_delta_ms": 0.704, + "weight": 27.86 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_0.ffn_up", + "count": 57, + "mean_delta_ms": 0.678, + "std_delta_ms": 0.704, + "weight": 27.96 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_0.ffn_down", + "count": 57, + "mean_delta_ms": 0.683, + "std_delta_ms": 0.703, + "weight": 28.07 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_1", + "count": 57, + "mean_delta_ms": 0.861, + "std_delta_ms": 0.702, + "weight": 31.39 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_1.q", + "count": 57, + "mean_delta_ms": 0.868, + "std_delta_ms": 0.702, + "weight": 31.51 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_1.k", + "count": 57, + "mean_delta_ms": 0.873, + "std_delta_ms": 0.702, + "weight": 31.59 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_1.v", + "count": 57, + "mean_delta_ms": 0.878, + "std_delta_ms": 0.702, + "weight": 31.68 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_1", + "count": 57, + "mean_delta_ms": 0.885, + "std_delta_ms": 0.702, + "weight": 31.79 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_1.keys", + "count": 57, + "mean_delta_ms": 0.891, + "std_delta_ms": 0.702, + "weight": 31.87 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_1.values", + "count": 57, + "mean_delta_ms": 0.896, + "std_delta_ms": 0.702, + "weight": 31.95 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_1.ffn_up", + "count": 57, + "mean_delta_ms": 0.9, + "std_delta_ms": 0.702, + "weight": 32.03 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_1.ffn_down", + "count": 57, + "mean_delta_ms": 0.906, + "std_delta_ms": 0.702, + "weight": 32.11 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_2", + "count": 51, + "mean_delta_ms": 0.992, + "std_delta_ms": 0.66, + "weight": 30.63 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_2.q", + "count": 51, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.66, + "weight": 30.72 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_2.k", + "count": 51, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.66, + "weight": 30.78 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_2.v", + "count": 51, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.66, + "weight": 30.85 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_2", + "count": 51, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.66, + "weight": 30.93 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_2.keys", + "count": 51, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.66, + "weight": 30.99 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_2.values", + "count": 50, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.651, + "weight": 30.37 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_2.ffn_up", + "count": 50, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.651, + "weight": 30.42 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_2.ffn_down", + "count": 49, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.645, + "weight": 29.88 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_3", + "count": 40, + "mean_delta_ms": 0.984, + "std_delta_ms": 0.537, + "weight": 25.89 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_3.q", + "count": 40, + "mean_delta_ms": 0.992, + "std_delta_ms": 0.536, + "weight": 25.97 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_3.k", + "count": 40, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.536, + "weight": 26.02 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_3.v", + "count": 40, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.536, + "weight": 26.08 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_3", + "count": 40, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.535, + "weight": 26.14 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_3.keys", + "count": 40, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.535, + "weight": 26.19 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_3.values", + "count": 40, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.535, + "weight": 26.24 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_3.ffn_up", + "count": 40, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.535, + "weight": 26.3 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_3.ffn_down", + "count": 39, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.518, + "weight": 25.75 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_4", + "count": 30, + "mean_delta_ms": 0.913, + "std_delta_ms": 0.121, + "weight": 26.48 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_4.q", + "count": 30, + "mean_delta_ms": 0.92, + "std_delta_ms": 0.123, + "weight": 26.48 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_4.k", + "count": 30, + "mean_delta_ms": 0.926, + "std_delta_ms": 0.125, + "weight": 26.44 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_4.v", + "count": 30, + "mean_delta_ms": 0.931, + "std_delta_ms": 0.125, + "weight": 26.44 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_4", + "count": 30, + "mean_delta_ms": 0.938, + "std_delta_ms": 0.125, + "weight": 26.46 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_4.keys", + "count": 30, + "mean_delta_ms": 0.944, + "std_delta_ms": 0.126, + "weight": 26.47 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_4.values", + "count": 30, + "mean_delta_ms": 0.949, + "std_delta_ms": 0.127, + "weight": 26.47 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_4.ffn_up", + "count": 30, + "mean_delta_ms": 0.955, + "std_delta_ms": 0.127, + "weight": 26.48 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_4.ffn_down", + "count": 30, + "mean_delta_ms": 0.96, + "std_delta_ms": 0.127, + "weight": 26.49 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 1.155, + "std_delta_ms": 0.148, + "weight": 26.59 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 1.164, + "std_delta_ms": 0.151, + "weight": 26.56 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 1.17, + "std_delta_ms": 0.152, + "weight": 26.56 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 1.175, + "std_delta_ms": 0.152, + "weight": 26.57 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 1.181, + "std_delta_ms": 0.152, + "weight": 26.57 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 1.187, + "std_delta_ms": 0.153, + "weight": 26.58 + }, + { + "source": "server.kv_cache_0", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 1.192, + "std_delta_ms": 0.153, + "weight": 26.59 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 1.197, + "std_delta_ms": 0.153, + "weight": 26.59 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 1.202, + "std_delta_ms": 0.154, + "weight": 26.6 + }, + { + "source": "server.kv_cache_0", + "target": "server.buffer", + "count": 31, + "mean_delta_ms": 1.371, + "std_delta_ms": 0.161, + "weight": 27.74 + }, + { + "source": "server.kv_cache_0", + "target": "server.buffer.logits", + "count": 28, + "mean_delta_ms": 1.355, + "std_delta_ms": 0.124, + "weight": 25.65 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_0", + "count": 27, + "mean_delta_ms": 1.372, + "std_delta_ms": 0.128, + "weight": 24.7 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_0.q", + "count": 27, + "mean_delta_ms": 1.377, + "std_delta_ms": 0.128, + "weight": 24.7 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_0.k", + "count": 27, + "mean_delta_ms": 1.382, + "std_delta_ms": 0.128, + "weight": 24.71 + }, + { + "source": "server.kv_cache_0", + "target": "server.layer_0.v", + "count": 27, + "mean_delta_ms": 1.391, + "std_delta_ms": 0.134, + "weight": 24.62 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_0.values", + "count": 57, + "mean_delta_ms": 0.666, + "std_delta_ms": 0.703, + "weight": 27.73 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_0.ffn_up", + "count": 57, + "mean_delta_ms": 0.671, + "std_delta_ms": 0.703, + "weight": 27.83 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_0.ffn_down", + "count": 57, + "mean_delta_ms": 0.676, + "std_delta_ms": 0.703, + "weight": 27.94 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_1", + "count": 57, + "mean_delta_ms": 0.854, + "std_delta_ms": 0.702, + "weight": 31.29 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_1.q", + "count": 57, + "mean_delta_ms": 0.861, + "std_delta_ms": 0.702, + "weight": 31.41 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_1.k", + "count": 57, + "mean_delta_ms": 0.867, + "std_delta_ms": 0.702, + "weight": 31.49 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_1.v", + "count": 57, + "mean_delta_ms": 0.872, + "std_delta_ms": 0.702, + "weight": 31.58 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_1", + "count": 57, + "mean_delta_ms": 0.879, + "std_delta_ms": 0.702, + "weight": 31.69 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_1.keys", + "count": 57, + "mean_delta_ms": 0.884, + "std_delta_ms": 0.702, + "weight": 31.78 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_1.values", + "count": 57, + "mean_delta_ms": 0.889, + "std_delta_ms": 0.702, + "weight": 31.85 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_1.ffn_up", + "count": 57, + "mean_delta_ms": 0.894, + "std_delta_ms": 0.702, + "weight": 31.93 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_1.ffn_down", + "count": 57, + "mean_delta_ms": 0.899, + "std_delta_ms": 0.702, + "weight": 32.01 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_2", + "count": 52, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.668, + "weight": 31.23 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_2.q", + "count": 51, + "mean_delta_ms": 0.993, + "std_delta_ms": 0.66, + "weight": 30.64 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_2.k", + "count": 51, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.66, + "weight": 30.7 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_2.v", + "count": 51, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.66, + "weight": 30.77 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_2", + "count": 51, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.66, + "weight": 30.85 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_2.keys", + "count": 51, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.66, + "weight": 30.92 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_2.values", + "count": 51, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.66, + "weight": 30.98 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_2.ffn_up", + "count": 50, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.652, + "weight": 30.35 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_2.ffn_down", + "count": 49, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.645, + "weight": 29.81 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_3", + "count": 42, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.567, + "weight": 27.05 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_3.q", + "count": 40, + "mean_delta_ms": 0.986, + "std_delta_ms": 0.536, + "weight": 25.91 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_3.k", + "count": 40, + "mean_delta_ms": 0.992, + "std_delta_ms": 0.536, + "weight": 25.96 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_3.v", + "count": 40, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.536, + "weight": 26.02 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_3", + "count": 40, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.536, + "weight": 26.08 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_3.keys", + "count": 40, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.535, + "weight": 26.13 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_3.values", + "count": 40, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.535, + "weight": 26.18 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_3.ffn_up", + "count": 40, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.535, + "weight": 26.24 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_3.ffn_down", + "count": 40, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.535, + "weight": 26.29 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_4", + "count": 30, + "mean_delta_ms": 0.907, + "std_delta_ms": 0.119, + "weight": 26.51 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_4.q", + "count": 30, + "mean_delta_ms": 0.914, + "std_delta_ms": 0.12, + "weight": 26.51 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_4.k", + "count": 30, + "mean_delta_ms": 0.92, + "std_delta_ms": 0.123, + "weight": 26.47 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_4.v", + "count": 30, + "mean_delta_ms": 0.925, + "std_delta_ms": 0.123, + "weight": 26.47 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_4", + "count": 30, + "mean_delta_ms": 0.932, + "std_delta_ms": 0.123, + "weight": 26.49 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_4.keys", + "count": 30, + "mean_delta_ms": 0.937, + "std_delta_ms": 0.124, + "weight": 26.5 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_4.values", + "count": 30, + "mean_delta_ms": 0.943, + "std_delta_ms": 0.125, + "weight": 26.49 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_4.ffn_up", + "count": 30, + "mean_delta_ms": 0.948, + "std_delta_ms": 0.125, + "weight": 26.51 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_4.ffn_down", + "count": 30, + "mean_delta_ms": 0.953, + "std_delta_ms": 0.125, + "weight": 26.52 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 1.149, + "std_delta_ms": 0.147, + "weight": 26.6 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 1.158, + "std_delta_ms": 0.149, + "weight": 26.57 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 1.163, + "std_delta_ms": 0.15, + "weight": 26.57 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 1.168, + "std_delta_ms": 0.15, + "weight": 26.58 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 1.175, + "std_delta_ms": 0.151, + "weight": 26.59 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 1.18, + "std_delta_ms": 0.151, + "weight": 26.59 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 1.185, + "std_delta_ms": 0.152, + "weight": 26.6 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 1.19, + "std_delta_ms": 0.152, + "weight": 26.61 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 1.195, + "std_delta_ms": 0.152, + "weight": 26.61 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.buffer", + "count": 31, + "mean_delta_ms": 1.364, + "std_delta_ms": 0.159, + "weight": 27.76 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.buffer.logits", + "count": 28, + "mean_delta_ms": 1.348, + "std_delta_ms": 0.122, + "weight": 25.68 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_0", + "count": 27, + "mean_delta_ms": 1.365, + "std_delta_ms": 0.125, + "weight": 24.73 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_0.q", + "count": 27, + "mean_delta_ms": 1.371, + "std_delta_ms": 0.126, + "weight": 24.73 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_0.k", + "count": 27, + "mean_delta_ms": 1.376, + "std_delta_ms": 0.126, + "weight": 24.74 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.layer_0.v", + "count": 27, + "mean_delta_ms": 1.384, + "std_delta_ms": 0.132, + "weight": 24.65 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.kv_cache_0", + "count": 27, + "mean_delta_ms": 1.391, + "std_delta_ms": 0.133, + "weight": 24.64 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_0.ffn_up", + "count": 57, + "mean_delta_ms": 0.666, + "std_delta_ms": 0.703, + "weight": 27.73 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_0.ffn_down", + "count": 57, + "mean_delta_ms": 0.671, + "std_delta_ms": 0.703, + "weight": 27.84 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_1", + "count": 57, + "mean_delta_ms": 0.849, + "std_delta_ms": 0.702, + "weight": 31.21 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_1.q", + "count": 57, + "mean_delta_ms": 0.856, + "std_delta_ms": 0.702, + "weight": 31.33 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_1.k", + "count": 57, + "mean_delta_ms": 0.862, + "std_delta_ms": 0.702, + "weight": 31.42 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_1.v", + "count": 57, + "mean_delta_ms": 0.867, + "std_delta_ms": 0.702, + "weight": 31.5 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_1", + "count": 57, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.702, + "weight": 31.61 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_1.keys", + "count": 57, + "mean_delta_ms": 0.879, + "std_delta_ms": 0.702, + "weight": 31.7 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_1.values", + "count": 57, + "mean_delta_ms": 0.884, + "std_delta_ms": 0.702, + "weight": 31.78 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_1.ffn_up", + "count": 57, + "mean_delta_ms": 0.889, + "std_delta_ms": 0.702, + "weight": 31.85 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_1.ffn_down", + "count": 57, + "mean_delta_ms": 0.894, + "std_delta_ms": 0.702, + "weight": 31.94 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_2", + "count": 53, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.676, + "weight": 31.87 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_2.q", + "count": 52, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.668, + "weight": 31.27 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_2.k", + "count": 52, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.668, + "weight": 31.33 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_2.v", + "count": 51, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.66, + "weight": 30.71 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_2", + "count": 51, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.66, + "weight": 30.79 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_2.keys", + "count": 51, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.66, + "weight": 30.86 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_2.values", + "count": 51, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.66, + "weight": 30.92 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_2.ffn_up", + "count": 51, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.66, + "weight": 30.98 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_2.ffn_down", + "count": 49, + "mean_delta_ms": 0.997, + "std_delta_ms": 0.645, + "weight": 29.75 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_3", + "count": 44, + "mean_delta_ms": 1.066, + "std_delta_ms": 0.59, + "weight": 28.32 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_3.q", + "count": 41, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.553, + "weight": 26.46 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_3.k", + "count": 40, + "mean_delta_ms": 0.987, + "std_delta_ms": 0.537, + "weight": 25.91 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_3.v", + "count": 40, + "mean_delta_ms": 0.993, + "std_delta_ms": 0.536, + "weight": 25.98 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_3", + "count": 40, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.536, + "weight": 26.04 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_3.keys", + "count": 40, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.536, + "weight": 26.09 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_3.values", + "count": 40, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.535, + "weight": 26.14 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_3.ffn_up", + "count": 40, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.535, + "weight": 26.2 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_3.ffn_down", + "count": 40, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.535, + "weight": 26.24 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_4", + "count": 30, + "mean_delta_ms": 0.902, + "std_delta_ms": 0.119, + "weight": 26.51 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_4.q", + "count": 30, + "mean_delta_ms": 0.909, + "std_delta_ms": 0.12, + "weight": 26.5 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_4.k", + "count": 30, + "mean_delta_ms": 0.915, + "std_delta_ms": 0.122, + "weight": 26.46 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_4.v", + "count": 30, + "mean_delta_ms": 0.92, + "std_delta_ms": 0.123, + "weight": 26.47 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_4", + "count": 30, + "mean_delta_ms": 0.927, + "std_delta_ms": 0.123, + "weight": 26.49 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_4.keys", + "count": 30, + "mean_delta_ms": 0.933, + "std_delta_ms": 0.123, + "weight": 26.49 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_4.values", + "count": 30, + "mean_delta_ms": 0.938, + "std_delta_ms": 0.124, + "weight": 26.49 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_4.ffn_up", + "count": 30, + "mean_delta_ms": 0.943, + "std_delta_ms": 0.124, + "weight": 26.51 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_4.ffn_down", + "count": 30, + "mean_delta_ms": 0.948, + "std_delta_ms": 0.125, + "weight": 26.52 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 1.144, + "std_delta_ms": 0.146, + "weight": 26.6 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 1.153, + "std_delta_ms": 0.149, + "weight": 26.56 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 1.159, + "std_delta_ms": 0.15, + "weight": 26.57 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 1.163, + "std_delta_ms": 0.15, + "weight": 26.58 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 1.17, + "std_delta_ms": 0.15, + "weight": 26.58 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 1.175, + "std_delta_ms": 0.151, + "weight": 26.59 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 1.181, + "std_delta_ms": 0.151, + "weight": 26.59 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 1.185, + "std_delta_ms": 0.151, + "weight": 26.6 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 1.191, + "std_delta_ms": 0.152, + "weight": 26.6 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.buffer", + "count": 31, + "mean_delta_ms": 1.359, + "std_delta_ms": 0.159, + "weight": 27.75 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.buffer.logits", + "count": 28, + "mean_delta_ms": 1.343, + "std_delta_ms": 0.121, + "weight": 25.68 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_0", + "count": 27, + "mean_delta_ms": 1.36, + "std_delta_ms": 0.125, + "weight": 24.73 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_0.q", + "count": 27, + "mean_delta_ms": 1.366, + "std_delta_ms": 0.125, + "weight": 24.73 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_0.k", + "count": 27, + "mean_delta_ms": 1.371, + "std_delta_ms": 0.125, + "weight": 24.74 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.layer_0.v", + "count": 27, + "mean_delta_ms": 1.379, + "std_delta_ms": 0.132, + "weight": 24.65 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_0", + "count": 27, + "mean_delta_ms": 1.386, + "std_delta_ms": 0.133, + "weight": 24.64 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.kv_cache_0.keys", + "count": 27, + "mean_delta_ms": 1.392, + "std_delta_ms": 0.133, + "weight": 24.64 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_0.ffn_down", + "count": 57, + "mean_delta_ms": 0.666, + "std_delta_ms": 0.703, + "weight": 27.74 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_1", + "count": 57, + "mean_delta_ms": 0.845, + "std_delta_ms": 0.702, + "weight": 31.13 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_1.q", + "count": 57, + "mean_delta_ms": 0.852, + "std_delta_ms": 0.702, + "weight": 31.25 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_1.k", + "count": 57, + "mean_delta_ms": 0.857, + "std_delta_ms": 0.702, + "weight": 31.34 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_1.v", + "count": 57, + "mean_delta_ms": 0.862, + "std_delta_ms": 0.702, + "weight": 31.42 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_1", + "count": 57, + "mean_delta_ms": 0.869, + "std_delta_ms": 0.702, + "weight": 31.53 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_1.keys", + "count": 57, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.702, + "weight": 31.62 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_1.values", + "count": 57, + "mean_delta_ms": 0.879, + "std_delta_ms": 0.702, + "weight": 31.7 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_1.ffn_up", + "count": 57, + "mean_delta_ms": 0.884, + "std_delta_ms": 0.702, + "weight": 31.78 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_1.ffn_down", + "count": 57, + "mean_delta_ms": 0.889, + "std_delta_ms": 0.702, + "weight": 31.86 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_2", + "count": 54, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.682, + "weight": 32.51 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_2.q", + "count": 53, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.676, + "weight": 31.9 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_2.k", + "count": 52, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.668, + "weight": 31.27 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_2.v", + "count": 52, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.668, + "weight": 31.34 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_2", + "count": 51, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.66, + "weight": 30.73 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_2.keys", + "count": 51, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.66, + "weight": 30.8 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_2.values", + "count": 51, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.66, + "weight": 30.86 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_2.ffn_up", + "count": 51, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.66, + "weight": 30.92 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_2.ffn_down", + "count": 50, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.654, + "weight": 30.37 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_3", + "count": 44, + "mean_delta_ms": 1.062, + "std_delta_ms": 0.59, + "weight": 28.28 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_3.q", + "count": 43, + "mean_delta_ms": 1.047, + "std_delta_ms": 0.579, + "weight": 27.69 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_3.k", + "count": 40, + "mean_delta_ms": 0.982, + "std_delta_ms": 0.537, + "weight": 25.86 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_3.v", + "count": 40, + "mean_delta_ms": 0.988, + "std_delta_ms": 0.536, + "weight": 25.93 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_3", + "count": 40, + "mean_delta_ms": 0.994, + "std_delta_ms": 0.536, + "weight": 25.99 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_3.keys", + "count": 40, + "mean_delta_ms": 1.0, + "std_delta_ms": 0.536, + "weight": 26.04 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_3.values", + "count": 40, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.536, + "weight": 26.09 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_3.ffn_up", + "count": 40, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.535, + "weight": 26.15 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_3.ffn_down", + "count": 40, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.535, + "weight": 26.2 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_4", + "count": 30, + "mean_delta_ms": 0.897, + "std_delta_ms": 0.119, + "weight": 26.49 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_4.q", + "count": 30, + "mean_delta_ms": 0.904, + "std_delta_ms": 0.12, + "weight": 26.49 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_4.k", + "count": 30, + "mean_delta_ms": 0.91, + "std_delta_ms": 0.122, + "weight": 26.45 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_4.v", + "count": 30, + "mean_delta_ms": 0.915, + "std_delta_ms": 0.123, + "weight": 26.45 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_4", + "count": 30, + "mean_delta_ms": 0.922, + "std_delta_ms": 0.123, + "weight": 26.47 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_4.keys", + "count": 30, + "mean_delta_ms": 0.928, + "std_delta_ms": 0.123, + "weight": 26.48 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_4.values", + "count": 30, + "mean_delta_ms": 0.933, + "std_delta_ms": 0.124, + "weight": 26.48 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_4.ffn_up", + "count": 30, + "mean_delta_ms": 0.938, + "std_delta_ms": 0.124, + "weight": 26.49 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_4.ffn_down", + "count": 30, + "mean_delta_ms": 0.944, + "std_delta_ms": 0.124, + "weight": 26.5 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 1.139, + "std_delta_ms": 0.146, + "weight": 26.59 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 1.148, + "std_delta_ms": 0.149, + "weight": 26.55 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 1.154, + "std_delta_ms": 0.15, + "weight": 26.56 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 1.159, + "std_delta_ms": 0.15, + "weight": 26.57 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 1.165, + "std_delta_ms": 0.15, + "weight": 26.57 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 1.171, + "std_delta_ms": 0.151, + "weight": 26.58 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 1.176, + "std_delta_ms": 0.151, + "weight": 26.58 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 1.181, + "std_delta_ms": 0.151, + "weight": 26.59 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 1.186, + "std_delta_ms": 0.152, + "weight": 26.59 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.buffer", + "count": 31, + "mean_delta_ms": 1.355, + "std_delta_ms": 0.159, + "weight": 27.74 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.buffer.logits", + "count": 28, + "mean_delta_ms": 1.339, + "std_delta_ms": 0.121, + "weight": 25.68 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_0", + "count": 27, + "mean_delta_ms": 1.356, + "std_delta_ms": 0.125, + "weight": 24.72 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_0.q", + "count": 27, + "mean_delta_ms": 1.361, + "std_delta_ms": 0.125, + "weight": 24.73 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_0.k", + "count": 27, + "mean_delta_ms": 1.366, + "std_delta_ms": 0.125, + "weight": 24.73 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.layer_0.v", + "count": 27, + "mean_delta_ms": 1.375, + "std_delta_ms": 0.131, + "weight": 24.64 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_0", + "count": 27, + "mean_delta_ms": 1.382, + "std_delta_ms": 0.133, + "weight": 24.64 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_0.keys", + "count": 27, + "mean_delta_ms": 1.387, + "std_delta_ms": 0.133, + "weight": 24.64 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.kv_cache_0.values", + "count": 27, + "mean_delta_ms": 1.392, + "std_delta_ms": 0.133, + "weight": 24.64 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_1", + "count": 57, + "mean_delta_ms": 0.84, + "std_delta_ms": 0.702, + "weight": 31.04 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_1.q", + "count": 57, + "mean_delta_ms": 0.846, + "std_delta_ms": 0.702, + "weight": 31.16 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_1.k", + "count": 57, + "mean_delta_ms": 0.852, + "std_delta_ms": 0.702, + "weight": 31.25 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_1.v", + "count": 57, + "mean_delta_ms": 0.857, + "std_delta_ms": 0.702, + "weight": 31.34 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_1", + "count": 57, + "mean_delta_ms": 0.864, + "std_delta_ms": 0.702, + "weight": 31.45 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_1.keys", + "count": 57, + "mean_delta_ms": 0.869, + "std_delta_ms": 0.702, + "weight": 31.54 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_1.values", + "count": 57, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.702, + "weight": 31.62 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_1.ffn_up", + "count": 57, + "mean_delta_ms": 0.879, + "std_delta_ms": 0.702, + "weight": 31.7 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_1.ffn_down", + "count": 57, + "mean_delta_ms": 0.884, + "std_delta_ms": 0.702, + "weight": 31.78 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_2", + "count": 54, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.682, + "weight": 32.45 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_2.q", + "count": 53, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.675, + "weight": 31.84 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_2.k", + "count": 53, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.675, + "weight": 31.91 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_2.v", + "count": 52, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.668, + "weight": 31.28 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_2", + "count": 52, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.668, + "weight": 31.36 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_2.keys", + "count": 51, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.66, + "weight": 30.74 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_2.values", + "count": 51, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.66, + "weight": 30.8 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_2.ffn_up", + "count": 51, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.66, + "weight": 30.86 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_2.ffn_down", + "count": 50, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.654, + "weight": 30.31 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_3", + "count": 44, + "mean_delta_ms": 1.057, + "std_delta_ms": 0.591, + "weight": 28.23 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_3.q", + "count": 44, + "mean_delta_ms": 1.064, + "std_delta_ms": 0.59, + "weight": 28.31 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_3.k", + "count": 43, + "mean_delta_ms": 1.048, + "std_delta_ms": 0.579, + "weight": 27.7 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_3.v", + "count": 41, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.552, + "weight": 26.49 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_3", + "count": 40, + "mean_delta_ms": 0.989, + "std_delta_ms": 0.536, + "weight": 25.95 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_3.keys", + "count": 40, + "mean_delta_ms": 0.995, + "std_delta_ms": 0.536, + "weight": 26.0 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_3.values", + "count": 40, + "mean_delta_ms": 1.0, + "std_delta_ms": 0.536, + "weight": 26.04 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_3.ffn_up", + "count": 40, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.535, + "weight": 26.11 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_3.ffn_down", + "count": 40, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.535, + "weight": 26.15 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_4", + "count": 30, + "mean_delta_ms": 0.892, + "std_delta_ms": 0.118, + "weight": 26.48 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_4.q", + "count": 30, + "mean_delta_ms": 0.899, + "std_delta_ms": 0.12, + "weight": 26.47 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_4.k", + "count": 30, + "mean_delta_ms": 0.905, + "std_delta_ms": 0.122, + "weight": 26.44 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_4.v", + "count": 30, + "mean_delta_ms": 0.91, + "std_delta_ms": 0.122, + "weight": 26.44 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_4", + "count": 30, + "mean_delta_ms": 0.917, + "std_delta_ms": 0.123, + "weight": 26.46 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_4.keys", + "count": 30, + "mean_delta_ms": 0.923, + "std_delta_ms": 0.123, + "weight": 26.47 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_4.values", + "count": 30, + "mean_delta_ms": 0.928, + "std_delta_ms": 0.124, + "weight": 26.46 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_4.ffn_up", + "count": 30, + "mean_delta_ms": 0.933, + "std_delta_ms": 0.124, + "weight": 26.48 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_4.ffn_down", + "count": 30, + "mean_delta_ms": 0.938, + "std_delta_ms": 0.124, + "weight": 26.49 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 1.134, + "std_delta_ms": 0.146, + "weight": 26.58 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 1.143, + "std_delta_ms": 0.149, + "weight": 26.54 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 1.149, + "std_delta_ms": 0.149, + "weight": 26.55 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 1.154, + "std_delta_ms": 0.15, + "weight": 26.56 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 1.16, + "std_delta_ms": 0.15, + "weight": 26.56 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 1.165, + "std_delta_ms": 0.151, + "weight": 26.57 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 1.171, + "std_delta_ms": 0.151, + "weight": 26.57 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 1.176, + "std_delta_ms": 0.151, + "weight": 26.58 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 1.181, + "std_delta_ms": 0.152, + "weight": 26.59 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.buffer", + "count": 31, + "mean_delta_ms": 1.349, + "std_delta_ms": 0.159, + "weight": 27.74 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.buffer.logits", + "count": 28, + "mean_delta_ms": 1.333, + "std_delta_ms": 0.121, + "weight": 25.67 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_0", + "count": 27, + "mean_delta_ms": 1.35, + "std_delta_ms": 0.124, + "weight": 24.72 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_0.q", + "count": 27, + "mean_delta_ms": 1.356, + "std_delta_ms": 0.125, + "weight": 24.73 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_0.k", + "count": 27, + "mean_delta_ms": 1.361, + "std_delta_ms": 0.125, + "weight": 24.73 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_0.v", + "count": 27, + "mean_delta_ms": 1.369, + "std_delta_ms": 0.131, + "weight": 24.64 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_0", + "count": 27, + "mean_delta_ms": 1.376, + "std_delta_ms": 0.132, + "weight": 24.63 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_0.keys", + "count": 27, + "mean_delta_ms": 1.382, + "std_delta_ms": 0.133, + "weight": 24.64 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.kv_cache_0.values", + "count": 27, + "mean_delta_ms": 1.386, + "std_delta_ms": 0.133, + "weight": 24.64 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.layer_0.ffn_up", + "count": 27, + "mean_delta_ms": 1.391, + "std_delta_ms": 0.133, + "weight": 24.64 + }, + { + "source": "server.layer_1", + "target": "server.layer_1.q", + "count": 57, + "mean_delta_ms": 0.667, + "std_delta_ms": 0.701, + "weight": 27.79 + }, + { + "source": "server.layer_1", + "target": "server.layer_1.k", + "count": 57, + "mean_delta_ms": 0.672, + "std_delta_ms": 0.701, + "weight": 27.91 + }, + { + "source": "server.layer_1", + "target": "server.layer_1.v", + "count": 57, + "mean_delta_ms": 0.677, + "std_delta_ms": 0.701, + "weight": 28.01 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_1", + "count": 57, + "mean_delta_ms": 0.684, + "std_delta_ms": 0.701, + "weight": 28.16 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_1.keys", + "count": 57, + "mean_delta_ms": 0.69, + "std_delta_ms": 0.701, + "weight": 28.27 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_1.values", + "count": 57, + "mean_delta_ms": 0.695, + "std_delta_ms": 0.701, + "weight": 28.37 + }, + { + "source": "server.layer_1", + "target": "server.layer_1.ffn_up", + "count": 57, + "mean_delta_ms": 0.699, + "std_delta_ms": 0.701, + "weight": 28.47 + }, + { + "source": "server.layer_1", + "target": "server.layer_1.ffn_down", + "count": 57, + "mean_delta_ms": 0.705, + "std_delta_ms": 0.701, + "weight": 28.58 + }, + { + "source": "server.layer_1", + "target": "server.layer_2", + "count": 57, + "mean_delta_ms": 0.902, + "std_delta_ms": 0.702, + "weight": 32.05 + }, + { + "source": "server.layer_1", + "target": "server.layer_2.q", + "count": 57, + "mean_delta_ms": 0.909, + "std_delta_ms": 0.702, + "weight": 32.16 + }, + { + "source": "server.layer_1", + "target": "server.layer_2.k", + "count": 57, + "mean_delta_ms": 0.915, + "std_delta_ms": 0.702, + "weight": 32.25 + }, + { + "source": "server.layer_1", + "target": "server.layer_2.v", + "count": 57, + "mean_delta_ms": 0.92, + "std_delta_ms": 0.702, + "weight": 32.33 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_2", + "count": 57, + "mean_delta_ms": 0.927, + "std_delta_ms": 0.702, + "weight": 32.43 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_2.keys", + "count": 56, + "mean_delta_ms": 0.913, + "std_delta_ms": 0.694, + "weight": 31.82 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_2.values", + "count": 56, + "mean_delta_ms": 0.918, + "std_delta_ms": 0.694, + "weight": 31.9 + }, + { + "source": "server.layer_1", + "target": "server.layer_2.ffn_up", + "count": 56, + "mean_delta_ms": 0.923, + "std_delta_ms": 0.694, + "weight": 31.97 + }, + { + "source": "server.layer_1", + "target": "server.layer_2.ffn_down", + "count": 56, + "mean_delta_ms": 0.939, + "std_delta_ms": 0.696, + "weight": 32.17 + }, + { + "source": "server.layer_1", + "target": "server.layer_3", + "count": 51, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.66, + "weight": 31.01 + }, + { + "source": "server.layer_1", + "target": "server.layer_3.q", + "count": 51, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.661, + "weight": 31.1 + }, + { + "source": "server.layer_1", + "target": "server.layer_3.k", + "count": 50, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.653, + "weight": 30.47 + }, + { + "source": "server.layer_1", + "target": "server.layer_3.v", + "count": 50, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.653, + "weight": 30.54 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_3", + "count": 50, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.653, + "weight": 30.62 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_3.keys", + "count": 49, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.645, + "weight": 29.99 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_3.values", + "count": 48, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.635, + "weight": 29.37 + }, + { + "source": "server.layer_1", + "target": "server.layer_3.ffn_up", + "count": 47, + "mean_delta_ms": 0.987, + "std_delta_ms": 0.625, + "weight": 28.78 + }, + { + "source": "server.layer_1", + "target": "server.layer_3.ffn_down", + "count": 47, + "mean_delta_ms": 0.992, + "std_delta_ms": 0.625, + "weight": 28.84 + }, + { + "source": "server.layer_1", + "target": "server.layer_4", + "count": 41, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.547, + "weight": 26.85 + }, + { + "source": "server.layer_1", + "target": "server.layer_4.q", + "count": 41, + "mean_delta_ms": 1.045, + "std_delta_ms": 0.547, + "weight": 26.92 + }, + { + "source": "server.layer_1", + "target": "server.layer_4.k", + "count": 41, + "mean_delta_ms": 1.051, + "std_delta_ms": 0.547, + "weight": 26.97 + }, + { + "source": "server.layer_1", + "target": "server.layer_4.v", + "count": 41, + "mean_delta_ms": 1.056, + "std_delta_ms": 0.547, + "weight": 27.01 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_4", + "count": 39, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.516, + "weight": 25.85 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_4.keys", + "count": 38, + "mean_delta_ms": 0.994, + "std_delta_ms": 0.497, + "weight": 25.34 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_4.values", + "count": 38, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.496, + "weight": 25.39 + }, + { + "source": "server.layer_1", + "target": "server.layer_4.ffn_up", + "count": 38, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.496, + "weight": 25.43 + }, + { + "source": "server.layer_1", + "target": "server.layer_4.ffn_down", + "count": 38, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.496, + "weight": 25.48 + }, + { + "source": "server.layer_1", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 0.955, + "std_delta_ms": 0.144, + "weight": 26.08 + }, + { + "source": "server.layer_1", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 0.964, + "std_delta_ms": 0.146, + "weight": 26.05 + }, + { + "source": "server.layer_1", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 0.969, + "std_delta_ms": 0.147, + "weight": 26.05 + }, + { + "source": "server.layer_1", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 0.974, + "std_delta_ms": 0.147, + "weight": 26.07 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 0.981, + "std_delta_ms": 0.147, + "weight": 26.08 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 0.986, + "std_delta_ms": 0.148, + "weight": 26.09 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 0.991, + "std_delta_ms": 0.148, + "weight": 26.1 + }, + { + "source": "server.layer_1", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 0.996, + "std_delta_ms": 0.148, + "weight": 26.11 + }, + { + "source": "server.layer_1", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.149, + "weight": 26.12 + }, + { + "source": "server.layer_1", + "target": "server.buffer", + "count": 31, + "mean_delta_ms": 1.169, + "std_delta_ms": 0.155, + "weight": 27.37 + }, + { + "source": "server.layer_1", + "target": "server.buffer.logits", + "count": 29, + "mean_delta_ms": 1.179, + "std_delta_ms": 0.177, + "weight": 25.21 + }, + { + "source": "server.layer_1", + "target": "server.layer_0", + "count": 28, + "mean_delta_ms": 1.197, + "std_delta_ms": 0.182, + "weight": 24.31 + }, + { + "source": "server.layer_1", + "target": "server.layer_0.q", + "count": 28, + "mean_delta_ms": 1.203, + "std_delta_ms": 0.182, + "weight": 24.31 + }, + { + "source": "server.layer_1", + "target": "server.layer_0.k", + "count": 28, + "mean_delta_ms": 1.208, + "std_delta_ms": 0.182, + "weight": 24.32 + }, + { + "source": "server.layer_1", + "target": "server.layer_0.v", + "count": 28, + "mean_delta_ms": 1.216, + "std_delta_ms": 0.186, + "weight": 24.28 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_0", + "count": 28, + "mean_delta_ms": 1.223, + "std_delta_ms": 0.187, + "weight": 24.29 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_0.keys", + "count": 28, + "mean_delta_ms": 1.228, + "std_delta_ms": 0.187, + "weight": 24.3 + }, + { + "source": "server.layer_1", + "target": "server.kv_cache_0.values", + "count": 28, + "mean_delta_ms": 1.233, + "std_delta_ms": 0.187, + "weight": 24.31 + }, + { + "source": "server.layer_1", + "target": "server.layer_0.ffn_up", + "count": 28, + "mean_delta_ms": 1.238, + "std_delta_ms": 0.188, + "weight": 24.31 + }, + { + "source": "server.layer_1", + "target": "server.layer_0.ffn_down", + "count": 28, + "mean_delta_ms": 1.243, + "std_delta_ms": 0.188, + "weight": 24.32 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_1.k", + "count": 57, + "mean_delta_ms": 0.665, + "std_delta_ms": 0.701, + "weight": 27.75 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_1.v", + "count": 57, + "mean_delta_ms": 0.67, + "std_delta_ms": 0.701, + "weight": 27.86 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_1", + "count": 57, + "mean_delta_ms": 0.677, + "std_delta_ms": 0.701, + "weight": 28.01 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_1.keys", + "count": 57, + "mean_delta_ms": 0.682, + "std_delta_ms": 0.701, + "weight": 28.13 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_1.values", + "count": 57, + "mean_delta_ms": 0.687, + "std_delta_ms": 0.701, + "weight": 28.23 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_1.ffn_up", + "count": 57, + "mean_delta_ms": 0.692, + "std_delta_ms": 0.701, + "weight": 28.33 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_1.ffn_down", + "count": 57, + "mean_delta_ms": 0.697, + "std_delta_ms": 0.7, + "weight": 28.44 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_2", + "count": 57, + "mean_delta_ms": 0.895, + "std_delta_ms": 0.702, + "weight": 31.94 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_2.q", + "count": 57, + "mean_delta_ms": 0.902, + "std_delta_ms": 0.702, + "weight": 32.06 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_2.k", + "count": 57, + "mean_delta_ms": 0.908, + "std_delta_ms": 0.702, + "weight": 32.14 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_2.v", + "count": 57, + "mean_delta_ms": 0.913, + "std_delta_ms": 0.702, + "weight": 32.22 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_2", + "count": 57, + "mean_delta_ms": 0.919, + "std_delta_ms": 0.702, + "weight": 32.32 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_2.keys", + "count": 57, + "mean_delta_ms": 0.925, + "std_delta_ms": 0.702, + "weight": 32.4 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_2.values", + "count": 57, + "mean_delta_ms": 0.93, + "std_delta_ms": 0.702, + "weight": 32.48 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_2.ffn_up", + "count": 56, + "mean_delta_ms": 0.916, + "std_delta_ms": 0.694, + "weight": 31.87 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_2.ffn_down", + "count": 56, + "mean_delta_ms": 0.932, + "std_delta_ms": 0.696, + "weight": 32.06 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_3", + "count": 51, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.66, + "weight": 30.92 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_3.q", + "count": 51, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.66, + "weight": 31.02 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_3.k", + "count": 51, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.66, + "weight": 31.08 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_3.v", + "count": 50, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.653, + "weight": 30.46 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_3", + "count": 50, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.653, + "weight": 30.54 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_3.keys", + "count": 50, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.653, + "weight": 30.6 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_3.values", + "count": 50, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.653, + "weight": 30.66 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_3.ffn_up", + "count": 49, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.644, + "weight": 30.05 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_3.ffn_down", + "count": 49, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.644, + "weight": 30.11 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_4", + "count": 41, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.547, + "weight": 26.79 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_4.q", + "count": 41, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.547, + "weight": 26.85 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_4.k", + "count": 41, + "mean_delta_ms": 1.044, + "std_delta_ms": 0.547, + "weight": 26.9 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_4.v", + "count": 41, + "mean_delta_ms": 1.049, + "std_delta_ms": 0.547, + "weight": 26.95 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_4", + "count": 41, + "mean_delta_ms": 1.055, + "std_delta_ms": 0.546, + "weight": 27.01 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_4.keys", + "count": 40, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.532, + "weight": 26.44 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_4.values", + "count": 39, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.515, + "weight": 25.89 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_4.ffn_up", + "count": 38, + "mean_delta_ms": 0.997, + "std_delta_ms": 0.496, + "weight": 25.37 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_4.ffn_down", + "count": 38, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.496, + "weight": 25.41 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 0.948, + "std_delta_ms": 0.143, + "weight": 26.07 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 0.957, + "std_delta_ms": 0.146, + "weight": 26.04 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 0.962, + "std_delta_ms": 0.146, + "weight": 26.04 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 0.967, + "std_delta_ms": 0.146, + "weight": 26.06 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 0.973, + "std_delta_ms": 0.147, + "weight": 26.07 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 0.979, + "std_delta_ms": 0.147, + "weight": 26.08 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 0.984, + "std_delta_ms": 0.148, + "weight": 26.09 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 0.989, + "std_delta_ms": 0.148, + "weight": 26.1 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 0.994, + "std_delta_ms": 0.148, + "weight": 26.11 + }, + { + "source": "server.layer_1.q", + "target": "server.buffer", + "count": 31, + "mean_delta_ms": 1.162, + "std_delta_ms": 0.155, + "weight": 27.36 + }, + { + "source": "server.layer_1.q", + "target": "server.buffer.logits", + "count": 29, + "mean_delta_ms": 1.172, + "std_delta_ms": 0.177, + "weight": 25.2 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_0", + "count": 28, + "mean_delta_ms": 1.19, + "std_delta_ms": 0.181, + "weight": 24.3 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_0.q", + "count": 28, + "mean_delta_ms": 1.196, + "std_delta_ms": 0.182, + "weight": 24.31 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_0.k", + "count": 28, + "mean_delta_ms": 1.2, + "std_delta_ms": 0.182, + "weight": 24.32 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_0.v", + "count": 28, + "mean_delta_ms": 1.209, + "std_delta_ms": 0.186, + "weight": 24.27 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_0", + "count": 28, + "mean_delta_ms": 1.216, + "std_delta_ms": 0.186, + "weight": 24.28 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_0.keys", + "count": 28, + "mean_delta_ms": 1.221, + "std_delta_ms": 0.187, + "weight": 24.29 + }, + { + "source": "server.layer_1.q", + "target": "server.kv_cache_0.values", + "count": 28, + "mean_delta_ms": 1.226, + "std_delta_ms": 0.187, + "weight": 24.3 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_0.ffn_up", + "count": 28, + "mean_delta_ms": 1.231, + "std_delta_ms": 0.187, + "weight": 24.3 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_0.ffn_down", + "count": 28, + "mean_delta_ms": 1.236, + "std_delta_ms": 0.187, + "weight": 24.31 + }, + { + "source": "server.layer_1.q", + "target": "server.layer_1", + "count": 27, + "mean_delta_ms": 1.386, + "std_delta_ms": 0.127, + "weight": 24.74 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_1.v", + "count": 57, + "mean_delta_ms": 0.664, + "std_delta_ms": 0.701, + "weight": 27.75 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_1", + "count": 57, + "mean_delta_ms": 0.671, + "std_delta_ms": 0.701, + "weight": 27.9 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_1.keys", + "count": 57, + "mean_delta_ms": 0.677, + "std_delta_ms": 0.7, + "weight": 28.01 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_1.values", + "count": 57, + "mean_delta_ms": 0.682, + "std_delta_ms": 0.7, + "weight": 28.12 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_1.ffn_up", + "count": 57, + "mean_delta_ms": 0.687, + "std_delta_ms": 0.7, + "weight": 28.22 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_1.ffn_down", + "count": 57, + "mean_delta_ms": 0.692, + "std_delta_ms": 0.7, + "weight": 28.32 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_2", + "count": 57, + "mean_delta_ms": 0.889, + "std_delta_ms": 0.702, + "weight": 31.86 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_2.q", + "count": 57, + "mean_delta_ms": 0.897, + "std_delta_ms": 0.702, + "weight": 31.97 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_2.k", + "count": 57, + "mean_delta_ms": 0.902, + "std_delta_ms": 0.702, + "weight": 32.05 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_2.v", + "count": 57, + "mean_delta_ms": 0.908, + "std_delta_ms": 0.702, + "weight": 32.14 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_2", + "count": 57, + "mean_delta_ms": 0.914, + "std_delta_ms": 0.702, + "weight": 32.24 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_2.keys", + "count": 57, + "mean_delta_ms": 0.919, + "std_delta_ms": 0.702, + "weight": 32.32 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_2.values", + "count": 57, + "mean_delta_ms": 0.925, + "std_delta_ms": 0.702, + "weight": 32.4 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_2.ffn_up", + "count": 57, + "mean_delta_ms": 0.929, + "std_delta_ms": 0.702, + "weight": 32.47 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_2.ffn_down", + "count": 56, + "mean_delta_ms": 0.926, + "std_delta_ms": 0.695, + "weight": 31.98 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_3", + "count": 51, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.66, + "weight": 30.86 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_3.q", + "count": 51, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.66, + "weight": 30.95 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_3.k", + "count": 51, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.66, + "weight": 31.02 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_3.v", + "count": 51, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.661, + "weight": 31.09 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_3", + "count": 50, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.653, + "weight": 30.48 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_3.keys", + "count": 50, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.653, + "weight": 30.54 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_3.values", + "count": 50, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.653, + "weight": 30.6 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_3.ffn_up", + "count": 50, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.652, + "weight": 30.67 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_3.ffn_down", + "count": 50, + "mean_delta_ms": 1.04, + "std_delta_ms": 0.652, + "weight": 30.73 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_4", + "count": 41, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.548, + "weight": 26.73 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_4.q", + "count": 41, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.547, + "weight": 26.8 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_4.k", + "count": 41, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.547, + "weight": 26.85 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_4.v", + "count": 41, + "mean_delta_ms": 1.043, + "std_delta_ms": 0.547, + "weight": 26.9 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_4", + "count": 41, + "mean_delta_ms": 1.05, + "std_delta_ms": 0.547, + "weight": 26.96 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_4.keys", + "count": 41, + "mean_delta_ms": 1.056, + "std_delta_ms": 0.546, + "weight": 27.02 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_4.values", + "count": 40, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.532, + "weight": 26.44 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_4.ffn_up", + "count": 39, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.515, + "weight": 25.89 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_4.ffn_down", + "count": 38, + "mean_delta_ms": 0.997, + "std_delta_ms": 0.496, + "weight": 25.37 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 0.942, + "std_delta_ms": 0.143, + "weight": 26.05 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 0.951, + "std_delta_ms": 0.145, + "weight": 26.02 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 0.957, + "std_delta_ms": 0.146, + "weight": 26.03 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 0.962, + "std_delta_ms": 0.146, + "weight": 26.04 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 0.968, + "std_delta_ms": 0.147, + "weight": 26.06 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 0.974, + "std_delta_ms": 0.147, + "weight": 26.07 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 0.979, + "std_delta_ms": 0.147, + "weight": 26.08 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 0.984, + "std_delta_ms": 0.147, + "weight": 26.09 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 0.989, + "std_delta_ms": 0.148, + "weight": 26.1 + }, + { + "source": "server.layer_1.k", + "target": "server.buffer", + "count": 31, + "mean_delta_ms": 1.156, + "std_delta_ms": 0.154, + "weight": 27.35 + }, + { + "source": "server.layer_1.k", + "target": "server.buffer.logits", + "count": 29, + "mean_delta_ms": 1.167, + "std_delta_ms": 0.176, + "weight": 25.19 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_0", + "count": 28, + "mean_delta_ms": 1.184, + "std_delta_ms": 0.181, + "weight": 24.29 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_0.q", + "count": 28, + "mean_delta_ms": 1.19, + "std_delta_ms": 0.181, + "weight": 24.3 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_0.k", + "count": 28, + "mean_delta_ms": 1.195, + "std_delta_ms": 0.182, + "weight": 24.31 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_0.v", + "count": 28, + "mean_delta_ms": 1.203, + "std_delta_ms": 0.185, + "weight": 24.27 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_0", + "count": 28, + "mean_delta_ms": 1.21, + "std_delta_ms": 0.186, + "weight": 24.27 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_0.keys", + "count": 28, + "mean_delta_ms": 1.216, + "std_delta_ms": 0.186, + "weight": 24.28 + }, + { + "source": "server.layer_1.k", + "target": "server.kv_cache_0.values", + "count": 28, + "mean_delta_ms": 1.22, + "std_delta_ms": 0.186, + "weight": 24.29 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_0.ffn_up", + "count": 28, + "mean_delta_ms": 1.225, + "std_delta_ms": 0.187, + "weight": 24.29 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_0.ffn_down", + "count": 28, + "mean_delta_ms": 1.23, + "std_delta_ms": 0.187, + "weight": 24.3 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_1", + "count": 27, + "mean_delta_ms": 1.38, + "std_delta_ms": 0.126, + "weight": 24.74 + }, + { + "source": "server.layer_1.k", + "target": "server.layer_1.q", + "count": 27, + "mean_delta_ms": 1.387, + "std_delta_ms": 0.126, + "weight": 24.75 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_1", + "count": 57, + "mean_delta_ms": 0.666, + "std_delta_ms": 0.701, + "weight": 27.79 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_1.keys", + "count": 57, + "mean_delta_ms": 0.672, + "std_delta_ms": 0.7, + "weight": 27.9 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_1.values", + "count": 57, + "mean_delta_ms": 0.677, + "std_delta_ms": 0.7, + "weight": 28.01 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_1.ffn_up", + "count": 57, + "mean_delta_ms": 0.681, + "std_delta_ms": 0.7, + "weight": 28.11 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_1.ffn_down", + "count": 57, + "mean_delta_ms": 0.687, + "std_delta_ms": 0.7, + "weight": 28.22 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_2", + "count": 57, + "mean_delta_ms": 0.884, + "std_delta_ms": 0.702, + "weight": 31.78 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_2.q", + "count": 57, + "mean_delta_ms": 0.891, + "std_delta_ms": 0.702, + "weight": 31.89 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_2.k", + "count": 57, + "mean_delta_ms": 0.897, + "std_delta_ms": 0.702, + "weight": 31.98 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_2.v", + "count": 57, + "mean_delta_ms": 0.902, + "std_delta_ms": 0.702, + "weight": 32.06 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_2", + "count": 57, + "mean_delta_ms": 0.909, + "std_delta_ms": 0.702, + "weight": 32.16 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_2.keys", + "count": 57, + "mean_delta_ms": 0.914, + "std_delta_ms": 0.702, + "weight": 32.25 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_2.values", + "count": 57, + "mean_delta_ms": 0.919, + "std_delta_ms": 0.702, + "weight": 32.32 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_2.ffn_up", + "count": 57, + "mean_delta_ms": 0.924, + "std_delta_ms": 0.702, + "weight": 32.4 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_2.ffn_down", + "count": 57, + "mean_delta_ms": 0.94, + "std_delta_ms": 0.704, + "weight": 32.6 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_3", + "count": 51, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.66, + "weight": 30.8 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_3.q", + "count": 51, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.66, + "weight": 30.89 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_3.k", + "count": 51, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.66, + "weight": 30.96 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_3.v", + "count": 51, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.661, + "weight": 31.03 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_3", + "count": 51, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.661, + "weight": 31.11 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_3.keys", + "count": 50, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.653, + "weight": 30.48 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_3.values", + "count": 50, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.653, + "weight": 30.54 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_3.ffn_up", + "count": 50, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.652, + "weight": 30.62 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_3.ffn_down", + "count": 50, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.652, + "weight": 30.68 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_4", + "count": 41, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.548, + "weight": 26.69 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_4.q", + "count": 41, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.547, + "weight": 26.75 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_4.k", + "count": 41, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.547, + "weight": 26.81 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_4.v", + "count": 41, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.547, + "weight": 26.85 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_4", + "count": 41, + "mean_delta_ms": 1.045, + "std_delta_ms": 0.547, + "weight": 26.92 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_4.keys", + "count": 41, + "mean_delta_ms": 1.051, + "std_delta_ms": 0.546, + "weight": 26.97 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_4.values", + "count": 41, + "mean_delta_ms": 1.056, + "std_delta_ms": 0.546, + "weight": 27.02 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_4.ffn_up", + "count": 39, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.516, + "weight": 25.84 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_4.ffn_down", + "count": 39, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.515, + "weight": 25.89 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 0.937, + "std_delta_ms": 0.143, + "weight": 26.04 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 0.946, + "std_delta_ms": 0.145, + "weight": 26.01 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.146, + "weight": 26.02 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 0.956, + "std_delta_ms": 0.146, + "weight": 26.03 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 0.963, + "std_delta_ms": 0.146, + "weight": 26.04 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 0.968, + "std_delta_ms": 0.147, + "weight": 26.05 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 0.974, + "std_delta_ms": 0.147, + "weight": 26.06 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 0.978, + "std_delta_ms": 0.147, + "weight": 26.07 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 0.984, + "std_delta_ms": 0.148, + "weight": 26.08 + }, + { + "source": "server.layer_1.v", + "target": "server.buffer", + "count": 31, + "mean_delta_ms": 1.151, + "std_delta_ms": 0.154, + "weight": 27.34 + }, + { + "source": "server.layer_1.v", + "target": "server.buffer.logits", + "count": 29, + "mean_delta_ms": 1.162, + "std_delta_ms": 0.176, + "weight": 25.18 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_0", + "count": 28, + "mean_delta_ms": 1.179, + "std_delta_ms": 0.181, + "weight": 24.28 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_0.q", + "count": 28, + "mean_delta_ms": 1.185, + "std_delta_ms": 0.181, + "weight": 24.29 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_0.k", + "count": 28, + "mean_delta_ms": 1.19, + "std_delta_ms": 0.181, + "weight": 24.3 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_0.v", + "count": 28, + "mean_delta_ms": 1.198, + "std_delta_ms": 0.185, + "weight": 24.26 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_0", + "count": 28, + "mean_delta_ms": 1.205, + "std_delta_ms": 0.186, + "weight": 24.26 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_0.keys", + "count": 28, + "mean_delta_ms": 1.21, + "std_delta_ms": 0.186, + "weight": 24.27 + }, + { + "source": "server.layer_1.v", + "target": "server.kv_cache_0.values", + "count": 28, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.186, + "weight": 24.28 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_0.ffn_up", + "count": 28, + "mean_delta_ms": 1.22, + "std_delta_ms": 0.187, + "weight": 24.28 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_0.ffn_down", + "count": 28, + "mean_delta_ms": 1.225, + "std_delta_ms": 0.187, + "weight": 24.29 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_1", + "count": 27, + "mean_delta_ms": 1.375, + "std_delta_ms": 0.126, + "weight": 24.74 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_1.q", + "count": 27, + "mean_delta_ms": 1.382, + "std_delta_ms": 0.126, + "weight": 24.74 + }, + { + "source": "server.layer_1.v", + "target": "server.layer_1.k", + "count": 27, + "mean_delta_ms": 1.387, + "std_delta_ms": 0.126, + "weight": 24.74 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_1.keys", + "count": 57, + "mean_delta_ms": 0.665, + "std_delta_ms": 0.7, + "weight": 27.75 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_1.values", + "count": 57, + "mean_delta_ms": 0.67, + "std_delta_ms": 0.7, + "weight": 27.86 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_1.ffn_up", + "count": 57, + "mean_delta_ms": 0.674, + "std_delta_ms": 0.7, + "weight": 27.96 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_1.ffn_down", + "count": 57, + "mean_delta_ms": 0.68, + "std_delta_ms": 0.7, + "weight": 28.07 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_2", + "count": 57, + "mean_delta_ms": 0.877, + "std_delta_ms": 0.702, + "weight": 31.66 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_2.q", + "count": 57, + "mean_delta_ms": 0.884, + "std_delta_ms": 0.702, + "weight": 31.78 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_2.k", + "count": 57, + "mean_delta_ms": 0.89, + "std_delta_ms": 0.702, + "weight": 31.87 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_2.v", + "count": 57, + "mean_delta_ms": 0.895, + "std_delta_ms": 0.702, + "weight": 31.95 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_2", + "count": 57, + "mean_delta_ms": 0.902, + "std_delta_ms": 0.702, + "weight": 32.05 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_2.keys", + "count": 57, + "mean_delta_ms": 0.907, + "std_delta_ms": 0.702, + "weight": 32.14 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_2.values", + "count": 57, + "mean_delta_ms": 0.912, + "std_delta_ms": 0.702, + "weight": 32.22 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_2.ffn_up", + "count": 57, + "mean_delta_ms": 0.917, + "std_delta_ms": 0.702, + "weight": 32.29 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_2.ffn_down", + "count": 57, + "mean_delta_ms": 0.933, + "std_delta_ms": 0.704, + "weight": 32.49 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_3", + "count": 51, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.66, + "weight": 30.71 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_3.q", + "count": 51, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.66, + "weight": 30.81 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_3.k", + "count": 51, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.66, + "weight": 30.88 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_3.v", + "count": 51, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.661, + "weight": 30.95 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_3", + "count": 51, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.661, + "weight": 31.03 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_3.keys", + "count": 51, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.66, + "weight": 31.09 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_3.values", + "count": 50, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.653, + "weight": 30.46 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_3.ffn_up", + "count": 50, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.652, + "weight": 30.54 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_3.ffn_down", + "count": 50, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.652, + "weight": 30.6 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_4", + "count": 41, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.548, + "weight": 26.62 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_4.q", + "count": 41, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.547, + "weight": 26.69 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_4.k", + "count": 41, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.547, + "weight": 26.74 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_4.v", + "count": 41, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.547, + "weight": 26.79 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_4", + "count": 41, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.546, + "weight": 26.86 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_4.keys", + "count": 41, + "mean_delta_ms": 1.043, + "std_delta_ms": 0.546, + "weight": 26.91 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_4.values", + "count": 41, + "mean_delta_ms": 1.049, + "std_delta_ms": 0.546, + "weight": 26.96 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_4.ffn_up", + "count": 41, + "mean_delta_ms": 1.054, + "std_delta_ms": 0.546, + "weight": 27.01 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_4.ffn_down", + "count": 39, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.515, + "weight": 25.83 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 0.93, + "std_delta_ms": 0.143, + "weight": 26.01 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 0.939, + "std_delta_ms": 0.145, + "weight": 25.98 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 0.945, + "std_delta_ms": 0.146, + "weight": 25.99 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 0.95, + "std_delta_ms": 0.146, + "weight": 26.0 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 0.956, + "std_delta_ms": 0.146, + "weight": 26.01 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 0.961, + "std_delta_ms": 0.147, + "weight": 26.03 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 0.967, + "std_delta_ms": 0.147, + "weight": 26.04 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 0.971, + "std_delta_ms": 0.147, + "weight": 26.05 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 0.977, + "std_delta_ms": 0.148, + "weight": 26.06 + }, + { + "source": "server.kv_cache_1", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 1.171, + "std_delta_ms": 0.212, + "weight": 27.09 + }, + { + "source": "server.kv_cache_1", + "target": "server.buffer.logits", + "count": 29, + "mean_delta_ms": 1.155, + "std_delta_ms": 0.176, + "weight": 25.16 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_0", + "count": 28, + "mean_delta_ms": 1.172, + "std_delta_ms": 0.181, + "weight": 24.26 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_0.q", + "count": 28, + "mean_delta_ms": 1.178, + "std_delta_ms": 0.181, + "weight": 24.26 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_0.k", + "count": 28, + "mean_delta_ms": 1.183, + "std_delta_ms": 0.181, + "weight": 24.27 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_0.v", + "count": 28, + "mean_delta_ms": 1.191, + "std_delta_ms": 0.185, + "weight": 24.23 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_0", + "count": 28, + "mean_delta_ms": 1.198, + "std_delta_ms": 0.186, + "weight": 24.24 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_0.keys", + "count": 28, + "mean_delta_ms": 1.203, + "std_delta_ms": 0.186, + "weight": 24.25 + }, + { + "source": "server.kv_cache_1", + "target": "server.kv_cache_0.values", + "count": 28, + "mean_delta_ms": 1.208, + "std_delta_ms": 0.186, + "weight": 24.26 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_0.ffn_up", + "count": 28, + "mean_delta_ms": 1.213, + "std_delta_ms": 0.187, + "weight": 24.26 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_0.ffn_down", + "count": 28, + "mean_delta_ms": 1.218, + "std_delta_ms": 0.187, + "weight": 24.27 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_1", + "count": 27, + "mean_delta_ms": 1.368, + "std_delta_ms": 0.126, + "weight": 24.72 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_1.q", + "count": 27, + "mean_delta_ms": 1.375, + "std_delta_ms": 0.126, + "weight": 24.73 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_1.k", + "count": 27, + "mean_delta_ms": 1.38, + "std_delta_ms": 0.127, + "weight": 24.73 + }, + { + "source": "server.kv_cache_1", + "target": "server.layer_1.v", + "count": 27, + "mean_delta_ms": 1.385, + "std_delta_ms": 0.127, + "weight": 24.74 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_1.values", + "count": 57, + "mean_delta_ms": 0.664, + "std_delta_ms": 0.7, + "weight": 27.74 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_1.ffn_up", + "count": 57, + "mean_delta_ms": 0.669, + "std_delta_ms": 0.7, + "weight": 27.85 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_1.ffn_down", + "count": 57, + "mean_delta_ms": 0.674, + "std_delta_ms": 0.7, + "weight": 27.96 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_2", + "count": 57, + "mean_delta_ms": 0.872, + "std_delta_ms": 0.702, + "weight": 31.58 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_2.q", + "count": 57, + "mean_delta_ms": 0.879, + "std_delta_ms": 0.702, + "weight": 31.69 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_2.k", + "count": 57, + "mean_delta_ms": 0.884, + "std_delta_ms": 0.702, + "weight": 31.78 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_2.v", + "count": 57, + "mean_delta_ms": 0.89, + "std_delta_ms": 0.702, + "weight": 31.87 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_2", + "count": 57, + "mean_delta_ms": 0.896, + "std_delta_ms": 0.702, + "weight": 31.96 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_2.keys", + "count": 57, + "mean_delta_ms": 0.902, + "std_delta_ms": 0.702, + "weight": 32.05 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_2.values", + "count": 57, + "mean_delta_ms": 0.907, + "std_delta_ms": 0.702, + "weight": 32.13 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_2.ffn_up", + "count": 57, + "mean_delta_ms": 0.912, + "std_delta_ms": 0.702, + "weight": 32.21 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_2.ffn_down", + "count": 57, + "mean_delta_ms": 0.927, + "std_delta_ms": 0.704, + "weight": 32.41 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_3", + "count": 51, + "mean_delta_ms": 0.994, + "std_delta_ms": 0.66, + "weight": 30.65 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_3.q", + "count": 51, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.66, + "weight": 30.74 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_3.k", + "count": 51, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.66, + "weight": 30.81 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_3.v", + "count": 51, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.66, + "weight": 30.89 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_3", + "count": 51, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.66, + "weight": 30.96 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_3.keys", + "count": 51, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.66, + "weight": 31.03 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_3.values", + "count": 51, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.66, + "weight": 31.09 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_3.ffn_up", + "count": 50, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.652, + "weight": 30.47 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_3.ffn_down", + "count": 50, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.652, + "weight": 30.53 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_4", + "count": 42, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.562, + "weight": 27.2 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_4.q", + "count": 41, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.547, + "weight": 26.64 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_4.k", + "count": 41, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.547, + "weight": 26.69 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_4.v", + "count": 41, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.547, + "weight": 26.74 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_4", + "count": 41, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.547, + "weight": 26.81 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_4.keys", + "count": 41, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.546, + "weight": 26.86 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_4.values", + "count": 41, + "mean_delta_ms": 1.043, + "std_delta_ms": 0.546, + "weight": 26.91 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_4.ffn_up", + "count": 41, + "mean_delta_ms": 1.048, + "std_delta_ms": 0.546, + "weight": 26.96 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_4.ffn_down", + "count": 41, + "mean_delta_ms": 1.053, + "std_delta_ms": 0.546, + "weight": 27.0 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 0.925, + "std_delta_ms": 0.143, + "weight": 25.99 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 0.934, + "std_delta_ms": 0.145, + "weight": 25.96 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 0.939, + "std_delta_ms": 0.146, + "weight": 25.97 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 0.944, + "std_delta_ms": 0.146, + "weight": 25.99 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 0.95, + "std_delta_ms": 0.146, + "weight": 26.0 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 0.956, + "std_delta_ms": 0.147, + "weight": 26.01 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 0.961, + "std_delta_ms": 0.147, + "weight": 26.02 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 0.966, + "std_delta_ms": 0.147, + "weight": 26.03 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 0.971, + "std_delta_ms": 0.148, + "weight": 26.04 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 1.165, + "std_delta_ms": 0.212, + "weight": 27.07 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.buffer.logits", + "count": 29, + "mean_delta_ms": 1.149, + "std_delta_ms": 0.176, + "weight": 25.14 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_0", + "count": 28, + "mean_delta_ms": 1.167, + "std_delta_ms": 0.181, + "weight": 24.24 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_0.q", + "count": 28, + "mean_delta_ms": 1.172, + "std_delta_ms": 0.181, + "weight": 24.25 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_0.k", + "count": 28, + "mean_delta_ms": 1.177, + "std_delta_ms": 0.181, + "weight": 24.26 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_0.v", + "count": 28, + "mean_delta_ms": 1.186, + "std_delta_ms": 0.185, + "weight": 24.22 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_0", + "count": 28, + "mean_delta_ms": 1.192, + "std_delta_ms": 0.186, + "weight": 24.23 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_0.keys", + "count": 28, + "mean_delta_ms": 1.198, + "std_delta_ms": 0.186, + "weight": 24.23 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_0.values", + "count": 28, + "mean_delta_ms": 1.203, + "std_delta_ms": 0.186, + "weight": 24.24 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_0.ffn_up", + "count": 28, + "mean_delta_ms": 1.207, + "std_delta_ms": 0.187, + "weight": 24.25 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_0.ffn_down", + "count": 28, + "mean_delta_ms": 1.212, + "std_delta_ms": 0.187, + "weight": 24.26 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_1", + "count": 27, + "mean_delta_ms": 1.362, + "std_delta_ms": 0.126, + "weight": 24.72 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_1.q", + "count": 27, + "mean_delta_ms": 1.369, + "std_delta_ms": 0.126, + "weight": 24.73 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_1.k", + "count": 27, + "mean_delta_ms": 1.374, + "std_delta_ms": 0.126, + "weight": 24.73 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.layer_1.v", + "count": 27, + "mean_delta_ms": 1.379, + "std_delta_ms": 0.126, + "weight": 24.73 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.kv_cache_1", + "count": 27, + "mean_delta_ms": 1.386, + "std_delta_ms": 0.127, + "weight": 24.73 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_1.ffn_up", + "count": 57, + "mean_delta_ms": 0.664, + "std_delta_ms": 0.7, + "weight": 27.74 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_1.ffn_down", + "count": 57, + "mean_delta_ms": 0.669, + "std_delta_ms": 0.7, + "weight": 27.85 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_2", + "count": 57, + "mean_delta_ms": 0.867, + "std_delta_ms": 0.702, + "weight": 31.5 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_2.q", + "count": 57, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.702, + "weight": 31.61 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_2.k", + "count": 57, + "mean_delta_ms": 0.879, + "std_delta_ms": 0.702, + "weight": 31.7 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_2.v", + "count": 57, + "mean_delta_ms": 0.885, + "std_delta_ms": 0.702, + "weight": 31.79 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_2", + "count": 57, + "mean_delta_ms": 0.891, + "std_delta_ms": 0.702, + "weight": 31.89 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_2.keys", + "count": 57, + "mean_delta_ms": 0.897, + "std_delta_ms": 0.702, + "weight": 31.98 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_2.values", + "count": 57, + "mean_delta_ms": 0.902, + "std_delta_ms": 0.702, + "weight": 32.05 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_2.ffn_up", + "count": 57, + "mean_delta_ms": 0.907, + "std_delta_ms": 0.702, + "weight": 32.13 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_2.ffn_down", + "count": 57, + "mean_delta_ms": 0.922, + "std_delta_ms": 0.704, + "weight": 32.34 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_3", + "count": 52, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.668, + "weight": 31.27 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_3.q", + "count": 51, + "mean_delta_ms": 0.997, + "std_delta_ms": 0.66, + "weight": 30.68 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_3.k", + "count": 51, + "mean_delta_ms": 1.003, + "std_delta_ms": 0.66, + "weight": 30.75 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_3.v", + "count": 51, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.66, + "weight": 30.83 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_3", + "count": 51, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.66, + "weight": 30.9 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_3.keys", + "count": 51, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.66, + "weight": 30.97 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_3.values", + "count": 51, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.66, + "weight": 31.03 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_3.ffn_up", + "count": 51, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.66, + "weight": 31.11 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_3.ffn_down", + "count": 50, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.652, + "weight": 30.48 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_4", + "count": 42, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.562, + "weight": 27.15 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_4.q", + "count": 41, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.547, + "weight": 26.59 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_4.k", + "count": 41, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.547, + "weight": 26.65 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_4.v", + "count": 41, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.547, + "weight": 26.69 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_4", + "count": 41, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.547, + "weight": 26.76 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_4.keys", + "count": 41, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.546, + "weight": 26.82 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_4.values", + "count": 41, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.546, + "weight": 26.87 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_4.ffn_up", + "count": 41, + "mean_delta_ms": 1.044, + "std_delta_ms": 0.546, + "weight": 26.91 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_4.ffn_down", + "count": 41, + "mean_delta_ms": 1.048, + "std_delta_ms": 0.546, + "weight": 26.96 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 0.92, + "std_delta_ms": 0.143, + "weight": 25.97 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 0.929, + "std_delta_ms": 0.145, + "weight": 25.95 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 0.934, + "std_delta_ms": 0.146, + "weight": 25.96 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 0.939, + "std_delta_ms": 0.146, + "weight": 25.97 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 0.945, + "std_delta_ms": 0.146, + "weight": 25.98 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 0.951, + "std_delta_ms": 0.147, + "weight": 25.99 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 0.956, + "std_delta_ms": 0.147, + "weight": 26.0 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 0.961, + "std_delta_ms": 0.147, + "weight": 26.02 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 0.966, + "std_delta_ms": 0.148, + "weight": 26.02 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 1.16, + "std_delta_ms": 0.212, + "weight": 27.05 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.buffer.logits", + "count": 29, + "mean_delta_ms": 1.144, + "std_delta_ms": 0.176, + "weight": 25.13 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_0", + "count": 28, + "mean_delta_ms": 1.162, + "std_delta_ms": 0.181, + "weight": 24.23 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_0.q", + "count": 28, + "mean_delta_ms": 1.167, + "std_delta_ms": 0.181, + "weight": 24.24 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_0.k", + "count": 28, + "mean_delta_ms": 1.172, + "std_delta_ms": 0.181, + "weight": 24.25 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_0.v", + "count": 28, + "mean_delta_ms": 1.181, + "std_delta_ms": 0.185, + "weight": 24.21 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_0", + "count": 28, + "mean_delta_ms": 1.187, + "std_delta_ms": 0.186, + "weight": 24.21 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_0.keys", + "count": 28, + "mean_delta_ms": 1.193, + "std_delta_ms": 0.186, + "weight": 24.22 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_0.values", + "count": 28, + "mean_delta_ms": 1.198, + "std_delta_ms": 0.186, + "weight": 24.23 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_0.ffn_up", + "count": 28, + "mean_delta_ms": 1.202, + "std_delta_ms": 0.187, + "weight": 24.24 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_0.ffn_down", + "count": 28, + "mean_delta_ms": 1.207, + "std_delta_ms": 0.187, + "weight": 24.25 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_1", + "count": 27, + "mean_delta_ms": 1.357, + "std_delta_ms": 0.125, + "weight": 24.72 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_1.q", + "count": 27, + "mean_delta_ms": 1.364, + "std_delta_ms": 0.126, + "weight": 24.72 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_1.k", + "count": 27, + "mean_delta_ms": 1.369, + "std_delta_ms": 0.126, + "weight": 24.72 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.layer_1.v", + "count": 27, + "mean_delta_ms": 1.374, + "std_delta_ms": 0.126, + "weight": 24.73 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_1", + "count": 27, + "mean_delta_ms": 1.381, + "std_delta_ms": 0.127, + "weight": 24.73 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.kv_cache_1.keys", + "count": 27, + "mean_delta_ms": 1.387, + "std_delta_ms": 0.127, + "weight": 24.73 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_1.ffn_down", + "count": 57, + "mean_delta_ms": 0.664, + "std_delta_ms": 0.7, + "weight": 27.75 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_2", + "count": 57, + "mean_delta_ms": 0.862, + "std_delta_ms": 0.702, + "weight": 31.42 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_2.q", + "count": 57, + "mean_delta_ms": 0.869, + "std_delta_ms": 0.702, + "weight": 31.54 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_2.k", + "count": 57, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.702, + "weight": 31.62 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_2.v", + "count": 57, + "mean_delta_ms": 0.88, + "std_delta_ms": 0.702, + "weight": 31.71 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_2", + "count": 57, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.702, + "weight": 31.81 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_2.keys", + "count": 57, + "mean_delta_ms": 0.892, + "std_delta_ms": 0.702, + "weight": 31.9 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_2.values", + "count": 57, + "mean_delta_ms": 0.897, + "std_delta_ms": 0.702, + "weight": 31.98 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_2.ffn_up", + "count": 57, + "mean_delta_ms": 0.902, + "std_delta_ms": 0.702, + "weight": 32.06 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_2.ffn_down", + "count": 57, + "mean_delta_ms": 0.917, + "std_delta_ms": 0.704, + "weight": 32.26 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_3", + "count": 53, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.675, + "weight": 31.91 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_3.q", + "count": 52, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.668, + "weight": 31.31 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_3.k", + "count": 51, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.66, + "weight": 30.69 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_3.v", + "count": 51, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.66, + "weight": 30.77 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_3", + "count": 51, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.66, + "weight": 30.84 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_3.keys", + "count": 51, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.66, + "weight": 30.91 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_3.values", + "count": 51, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.66, + "weight": 30.97 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_3.ffn_up", + "count": 51, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.66, + "weight": 31.05 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_3.ffn_down", + "count": 51, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.66, + "weight": 31.11 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_4", + "count": 42, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.562, + "weight": 27.1 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_4.q", + "count": 42, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.562, + "weight": 27.17 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_4.k", + "count": 41, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.547, + "weight": 26.6 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_4.v", + "count": 41, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.547, + "weight": 26.65 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_4", + "count": 41, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.547, + "weight": 26.72 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_4.keys", + "count": 41, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.546, + "weight": 26.77 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_4.values", + "count": 41, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.546, + "weight": 26.82 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_4.ffn_up", + "count": 41, + "mean_delta_ms": 1.039, + "std_delta_ms": 0.546, + "weight": 26.87 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_4.ffn_down", + "count": 41, + "mean_delta_ms": 1.044, + "std_delta_ms": 0.546, + "weight": 26.91 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 0.915, + "std_delta_ms": 0.142, + "weight": 25.96 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 0.924, + "std_delta_ms": 0.145, + "weight": 25.93 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 0.929, + "std_delta_ms": 0.146, + "weight": 25.94 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 0.934, + "std_delta_ms": 0.146, + "weight": 25.95 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 0.94, + "std_delta_ms": 0.146, + "weight": 25.97 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 0.946, + "std_delta_ms": 0.146, + "weight": 25.98 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 0.951, + "std_delta_ms": 0.147, + "weight": 25.99 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 0.956, + "std_delta_ms": 0.147, + "weight": 26.0 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 0.961, + "std_delta_ms": 0.147, + "weight": 26.01 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 1.156, + "std_delta_ms": 0.212, + "weight": 27.03 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.buffer.logits", + "count": 29, + "mean_delta_ms": 1.139, + "std_delta_ms": 0.176, + "weight": 25.12 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_0", + "count": 28, + "mean_delta_ms": 1.157, + "std_delta_ms": 0.181, + "weight": 24.22 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_0.q", + "count": 28, + "mean_delta_ms": 1.162, + "std_delta_ms": 0.181, + "weight": 24.23 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_0.k", + "count": 28, + "mean_delta_ms": 1.167, + "std_delta_ms": 0.181, + "weight": 24.24 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_0.v", + "count": 28, + "mean_delta_ms": 1.176, + "std_delta_ms": 0.185, + "weight": 24.19 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_0", + "count": 28, + "mean_delta_ms": 1.183, + "std_delta_ms": 0.186, + "weight": 24.2 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_0.keys", + "count": 28, + "mean_delta_ms": 1.188, + "std_delta_ms": 0.186, + "weight": 24.21 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_0.values", + "count": 28, + "mean_delta_ms": 1.193, + "std_delta_ms": 0.186, + "weight": 24.22 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_0.ffn_up", + "count": 28, + "mean_delta_ms": 1.197, + "std_delta_ms": 0.187, + "weight": 24.22 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_0.ffn_down", + "count": 28, + "mean_delta_ms": 1.202, + "std_delta_ms": 0.187, + "weight": 24.24 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_1", + "count": 27, + "mean_delta_ms": 1.352, + "std_delta_ms": 0.125, + "weight": 24.71 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_1.q", + "count": 27, + "mean_delta_ms": 1.359, + "std_delta_ms": 0.126, + "weight": 24.72 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_1.k", + "count": 27, + "mean_delta_ms": 1.364, + "std_delta_ms": 0.126, + "weight": 24.72 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.layer_1.v", + "count": 27, + "mean_delta_ms": 1.369, + "std_delta_ms": 0.126, + "weight": 24.72 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_1", + "count": 27, + "mean_delta_ms": 1.376, + "std_delta_ms": 0.127, + "weight": 24.72 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_1.keys", + "count": 27, + "mean_delta_ms": 1.382, + "std_delta_ms": 0.127, + "weight": 24.72 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.kv_cache_1.values", + "count": 27, + "mean_delta_ms": 1.387, + "std_delta_ms": 0.128, + "weight": 24.73 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_2", + "count": 57, + "mean_delta_ms": 0.857, + "std_delta_ms": 0.702, + "weight": 31.33 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_2.q", + "count": 57, + "mean_delta_ms": 0.864, + "std_delta_ms": 0.702, + "weight": 31.45 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_2.k", + "count": 57, + "mean_delta_ms": 0.869, + "std_delta_ms": 0.702, + "weight": 31.54 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_2.v", + "count": 57, + "mean_delta_ms": 0.875, + "std_delta_ms": 0.702, + "weight": 31.63 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_2", + "count": 57, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.702, + "weight": 31.73 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_2.keys", + "count": 57, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.702, + "weight": 31.82 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_2.values", + "count": 57, + "mean_delta_ms": 0.892, + "std_delta_ms": 0.702, + "weight": 31.9 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_2.ffn_up", + "count": 57, + "mean_delta_ms": 0.897, + "std_delta_ms": 0.702, + "weight": 31.97 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_2.ffn_down", + "count": 57, + "mean_delta_ms": 0.912, + "std_delta_ms": 0.703, + "weight": 32.18 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_3", + "count": 53, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.675, + "weight": 31.85 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_3.q", + "count": 53, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.675, + "weight": 31.94 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_3.k", + "count": 52, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.668, + "weight": 31.32 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_3.v", + "count": 51, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.66, + "weight": 30.7 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_3", + "count": 51, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.66, + "weight": 30.78 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_3.keys", + "count": 51, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.66, + "weight": 30.85 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_3.values", + "count": 51, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.66, + "weight": 30.91 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_3.ffn_up", + "count": 51, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.66, + "weight": 30.99 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_3.ffn_down", + "count": 51, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.66, + "weight": 31.05 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_4", + "count": 42, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.562, + "weight": 27.05 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_4.q", + "count": 42, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.562, + "weight": 27.12 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_4.k", + "count": 42, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.562, + "weight": 27.18 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_4.v", + "count": 41, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.547, + "weight": 26.6 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_4", + "count": 41, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.547, + "weight": 26.67 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_4.keys", + "count": 41, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.547, + "weight": 26.72 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_4.values", + "count": 41, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.546, + "weight": 26.77 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_4.ffn_up", + "count": 41, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.546, + "weight": 26.82 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_4.ffn_down", + "count": 41, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.546, + "weight": 26.87 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_5", + "count": 30, + "mean_delta_ms": 0.909, + "std_delta_ms": 0.142, + "weight": 25.94 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_5.q", + "count": 30, + "mean_delta_ms": 0.919, + "std_delta_ms": 0.145, + "weight": 25.91 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_5.k", + "count": 30, + "mean_delta_ms": 0.924, + "std_delta_ms": 0.145, + "weight": 25.92 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_5.v", + "count": 30, + "mean_delta_ms": 0.929, + "std_delta_ms": 0.145, + "weight": 25.94 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_5", + "count": 30, + "mean_delta_ms": 0.935, + "std_delta_ms": 0.146, + "weight": 25.95 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_5.keys", + "count": 30, + "mean_delta_ms": 0.941, + "std_delta_ms": 0.146, + "weight": 25.96 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_5.values", + "count": 30, + "mean_delta_ms": 0.946, + "std_delta_ms": 0.147, + "weight": 25.97 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_5.ffn_up", + "count": 30, + "mean_delta_ms": 0.951, + "std_delta_ms": 0.147, + "weight": 25.99 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_5.ffn_down", + "count": 30, + "mean_delta_ms": 0.956, + "std_delta_ms": 0.147, + "weight": 25.99 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 1.15, + "std_delta_ms": 0.212, + "weight": 27.02 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.buffer.logits", + "count": 29, + "mean_delta_ms": 1.134, + "std_delta_ms": 0.176, + "weight": 25.1 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_0", + "count": 28, + "mean_delta_ms": 1.151, + "std_delta_ms": 0.18, + "weight": 24.21 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_0.q", + "count": 28, + "mean_delta_ms": 1.157, + "std_delta_ms": 0.181, + "weight": 24.21 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_0.k", + "count": 28, + "mean_delta_ms": 1.162, + "std_delta_ms": 0.181, + "weight": 24.23 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_0.v", + "count": 28, + "mean_delta_ms": 1.17, + "std_delta_ms": 0.185, + "weight": 24.18 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_0", + "count": 28, + "mean_delta_ms": 1.177, + "std_delta_ms": 0.185, + "weight": 24.19 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_0.keys", + "count": 28, + "mean_delta_ms": 1.183, + "std_delta_ms": 0.186, + "weight": 24.2 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_0.values", + "count": 28, + "mean_delta_ms": 1.187, + "std_delta_ms": 0.186, + "weight": 24.21 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_0.ffn_up", + "count": 28, + "mean_delta_ms": 1.192, + "std_delta_ms": 0.186, + "weight": 24.21 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_0.ffn_down", + "count": 28, + "mean_delta_ms": 1.197, + "std_delta_ms": 0.187, + "weight": 24.22 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_1", + "count": 27, + "mean_delta_ms": 1.347, + "std_delta_ms": 0.125, + "weight": 24.71 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_1.q", + "count": 27, + "mean_delta_ms": 1.354, + "std_delta_ms": 0.125, + "weight": 24.71 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_1.k", + "count": 27, + "mean_delta_ms": 1.359, + "std_delta_ms": 0.126, + "weight": 24.71 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_1.v", + "count": 27, + "mean_delta_ms": 1.364, + "std_delta_ms": 0.126, + "weight": 24.72 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_1", + "count": 27, + "mean_delta_ms": 1.371, + "std_delta_ms": 0.126, + "weight": 24.72 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_1.keys", + "count": 27, + "mean_delta_ms": 1.376, + "std_delta_ms": 0.127, + "weight": 24.72 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.kv_cache_1.values", + "count": 27, + "mean_delta_ms": 1.381, + "std_delta_ms": 0.127, + "weight": 24.73 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.layer_1.ffn_up", + "count": 27, + "mean_delta_ms": 1.386, + "std_delta_ms": 0.127, + "weight": 24.73 + }, + { + "source": "server.layer_2", + "target": "server.layer_2.q", + "count": 57, + "mean_delta_ms": 0.668, + "std_delta_ms": 0.702, + "weight": 27.8 + }, + { + "source": "server.layer_2", + "target": "server.layer_2.k", + "count": 57, + "mean_delta_ms": 0.674, + "std_delta_ms": 0.702, + "weight": 27.91 + }, + { + "source": "server.layer_2", + "target": "server.layer_2.v", + "count": 57, + "mean_delta_ms": 0.679, + "std_delta_ms": 0.702, + "weight": 28.03 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_2", + "count": 57, + "mean_delta_ms": 0.685, + "std_delta_ms": 0.702, + "weight": 28.16 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_2.keys", + "count": 57, + "mean_delta_ms": 0.691, + "std_delta_ms": 0.702, + "weight": 28.28 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_2.values", + "count": 57, + "mean_delta_ms": 0.696, + "std_delta_ms": 0.702, + "weight": 28.38 + }, + { + "source": "server.layer_2", + "target": "server.layer_2.ffn_up", + "count": 57, + "mean_delta_ms": 0.701, + "std_delta_ms": 0.702, + "weight": 28.48 + }, + { + "source": "server.layer_2", + "target": "server.layer_2.ffn_down", + "count": 57, + "mean_delta_ms": 0.717, + "std_delta_ms": 0.704, + "weight": 28.75 + }, + { + "source": "server.layer_2", + "target": "server.layer_3", + "count": 57, + "mean_delta_ms": 0.894, + "std_delta_ms": 0.703, + "weight": 31.91 + }, + { + "source": "server.layer_2", + "target": "server.layer_3.q", + "count": 57, + "mean_delta_ms": 0.901, + "std_delta_ms": 0.703, + "weight": 32.03 + }, + { + "source": "server.layer_2", + "target": "server.layer_3.k", + "count": 57, + "mean_delta_ms": 0.907, + "std_delta_ms": 0.703, + "weight": 32.12 + }, + { + "source": "server.layer_2", + "target": "server.layer_3.v", + "count": 57, + "mean_delta_ms": 0.914, + "std_delta_ms": 0.703, + "weight": 32.22 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_3", + "count": 57, + "mean_delta_ms": 0.92, + "std_delta_ms": 0.703, + "weight": 32.31 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_3.keys", + "count": 57, + "mean_delta_ms": 0.925, + "std_delta_ms": 0.703, + "weight": 32.39 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.931, + "std_delta_ms": 0.703, + "weight": 32.47 + }, + { + "source": "server.layer_2", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.936, + "std_delta_ms": 0.702, + "weight": 32.57 + }, + { + "source": "server.layer_2", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.941, + "std_delta_ms": 0.702, + "weight": 32.65 + }, + { + "source": "server.layer_2", + "target": "server.layer_4", + "count": 52, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.664, + "weight": 31.64 + }, + { + "source": "server.layer_2", + "target": "server.layer_4.q", + "count": 52, + "mean_delta_ms": 1.039, + "std_delta_ms": 0.664, + "weight": 31.73 + }, + { + "source": "server.layer_2", + "target": "server.layer_4.k", + "count": 51, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.656, + "weight": 31.1 + }, + { + "source": "server.layer_2", + "target": "server.layer_4.v", + "count": 51, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.656, + "weight": 31.16 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_4", + "count": 51, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.656, + "weight": 31.24 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_4.keys", + "count": 51, + "mean_delta_ms": 1.043, + "std_delta_ms": 0.656, + "weight": 31.31 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_4.values", + "count": 51, + "mean_delta_ms": 1.048, + "std_delta_ms": 0.656, + "weight": 31.37 + }, + { + "source": "server.layer_2", + "target": "server.layer_4.ffn_up", + "count": 50, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.648, + "weight": 30.74 + }, + { + "source": "server.layer_2", + "target": "server.layer_4.ffn_down", + "count": 50, + "mean_delta_ms": 1.04, + "std_delta_ms": 0.648, + "weight": 30.79 + }, + { + "source": "server.layer_2", + "target": "server.layer_5", + "count": 41, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.545, + "weight": 26.87 + }, + { + "source": "server.layer_2", + "target": "server.layer_5.q", + "count": 41, + "mean_delta_ms": 1.044, + "std_delta_ms": 0.544, + "weight": 26.95 + }, + { + "source": "server.layer_2", + "target": "server.layer_5.k", + "count": 41, + "mean_delta_ms": 1.049, + "std_delta_ms": 0.544, + "weight": 27.0 + }, + { + "source": "server.layer_2", + "target": "server.layer_5.v", + "count": 41, + "mean_delta_ms": 1.054, + "std_delta_ms": 0.544, + "weight": 27.04 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_5", + "count": 40, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.529, + "weight": 26.48 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_5.keys", + "count": 40, + "mean_delta_ms": 1.042, + "std_delta_ms": 0.529, + "weight": 26.53 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_5.values", + "count": 40, + "mean_delta_ms": 1.047, + "std_delta_ms": 0.529, + "weight": 26.57 + }, + { + "source": "server.layer_2", + "target": "server.layer_5.ffn_up", + "count": 40, + "mean_delta_ms": 1.052, + "std_delta_ms": 0.529, + "weight": 26.62 + }, + { + "source": "server.layer_2", + "target": "server.layer_5.ffn_down", + "count": 40, + "mean_delta_ms": 1.057, + "std_delta_ms": 0.529, + "weight": 26.66 + }, + { + "source": "server.layer_2", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 0.953, + "std_delta_ms": 0.178, + "weight": 26.97 + }, + { + "source": "server.layer_2", + "target": "server.buffer.logits", + "count": 30, + "mean_delta_ms": 0.965, + "std_delta_ms": 0.2, + "weight": 24.84 + }, + { + "source": "server.layer_2", + "target": "server.layer_0", + "count": 29, + "mean_delta_ms": 0.982, + "std_delta_ms": 0.206, + "weight": 23.97 + }, + { + "source": "server.layer_2", + "target": "server.layer_0.q", + "count": 29, + "mean_delta_ms": 0.988, + "std_delta_ms": 0.207, + "weight": 23.98 + }, + { + "source": "server.layer_2", + "target": "server.layer_0.k", + "count": 29, + "mean_delta_ms": 0.993, + "std_delta_ms": 0.207, + "weight": 24.0 + }, + { + "source": "server.layer_2", + "target": "server.layer_0.v", + "count": 29, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.209, + "weight": 24.0 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_0", + "count": 29, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.209, + "weight": 24.01 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_0.keys", + "count": 29, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.213, + "weight": 23.98 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_0.values", + "count": 29, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.213, + "weight": 23.99 + }, + { + "source": "server.layer_2", + "target": "server.layer_0.ffn_up", + "count": 29, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.213, + "weight": 24.0 + }, + { + "source": "server.layer_2", + "target": "server.layer_0.ffn_down", + "count": 29, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.214, + "weight": 24.01 + }, + { + "source": "server.layer_2", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 1.207, + "std_delta_ms": 0.217, + "weight": 24.59 + }, + { + "source": "server.layer_2", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 1.214, + "std_delta_ms": 0.217, + "weight": 24.59 + }, + { + "source": "server.layer_2", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 1.219, + "std_delta_ms": 0.218, + "weight": 24.61 + }, + { + "source": "server.layer_2", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 1.224, + "std_delta_ms": 0.218, + "weight": 24.62 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 1.231, + "std_delta_ms": 0.218, + "weight": 24.63 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 1.237, + "std_delta_ms": 0.219, + "weight": 24.64 + }, + { + "source": "server.layer_2", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 1.241, + "std_delta_ms": 0.219, + "weight": 24.65 + }, + { + "source": "server.layer_2", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 1.246, + "std_delta_ms": 0.219, + "weight": 24.66 + }, + { + "source": "server.layer_2", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 1.251, + "std_delta_ms": 0.22, + "weight": 24.67 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_2.k", + "count": 57, + "mean_delta_ms": 0.666, + "std_delta_ms": 0.702, + "weight": 27.76 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_2.v", + "count": 57, + "mean_delta_ms": 0.672, + "std_delta_ms": 0.702, + "weight": 27.88 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_2", + "count": 57, + "mean_delta_ms": 0.678, + "std_delta_ms": 0.702, + "weight": 28.01 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_2.keys", + "count": 57, + "mean_delta_ms": 0.684, + "std_delta_ms": 0.702, + "weight": 28.13 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_2.values", + "count": 57, + "mean_delta_ms": 0.689, + "std_delta_ms": 0.702, + "weight": 28.23 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_2.ffn_up", + "count": 57, + "mean_delta_ms": 0.694, + "std_delta_ms": 0.702, + "weight": 28.33 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_2.ffn_down", + "count": 57, + "mean_delta_ms": 0.709, + "std_delta_ms": 0.704, + "weight": 28.61 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_3", + "count": 57, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.703, + "weight": 31.79 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_3.q", + "count": 57, + "mean_delta_ms": 0.894, + "std_delta_ms": 0.703, + "weight": 31.91 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_3.k", + "count": 57, + "mean_delta_ms": 0.9, + "std_delta_ms": 0.703, + "weight": 32.0 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_3.v", + "count": 57, + "mean_delta_ms": 0.906, + "std_delta_ms": 0.703, + "weight": 32.1 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_3", + "count": 57, + "mean_delta_ms": 0.913, + "std_delta_ms": 0.703, + "weight": 32.2 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_3.keys", + "count": 57, + "mean_delta_ms": 0.918, + "std_delta_ms": 0.703, + "weight": 32.28 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.923, + "std_delta_ms": 0.703, + "weight": 32.36 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.929, + "std_delta_ms": 0.702, + "weight": 32.46 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.934, + "std_delta_ms": 0.702, + "weight": 32.54 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_4", + "count": 52, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.664, + "weight": 31.55 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_4.q", + "count": 52, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.664, + "weight": 31.64 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_4.k", + "count": 52, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.664, + "weight": 31.71 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_4.v", + "count": 51, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.656, + "weight": 31.08 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_4", + "count": 51, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.656, + "weight": 31.16 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_4.keys", + "count": 51, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.656, + "weight": 31.23 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_4.values", + "count": 51, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.656, + "weight": 31.29 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_4.ffn_up", + "count": 51, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.656, + "weight": 31.35 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_4.ffn_down", + "count": 50, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.648, + "weight": 30.71 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_5", + "count": 41, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.545, + "weight": 26.8 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_5.q", + "count": 41, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.545, + "weight": 26.88 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_5.k", + "count": 41, + "mean_delta_ms": 1.042, + "std_delta_ms": 0.544, + "weight": 26.93 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_5.v", + "count": 41, + "mean_delta_ms": 1.047, + "std_delta_ms": 0.544, + "weight": 26.98 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_5", + "count": 41, + "mean_delta_ms": 1.053, + "std_delta_ms": 0.544, + "weight": 27.03 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_5.keys", + "count": 40, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.529, + "weight": 26.46 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_5.values", + "count": 40, + "mean_delta_ms": 1.04, + "std_delta_ms": 0.529, + "weight": 26.51 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_5.ffn_up", + "count": 40, + "mean_delta_ms": 1.045, + "std_delta_ms": 0.529, + "weight": 26.55 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_5.ffn_down", + "count": 40, + "mean_delta_ms": 1.05, + "std_delta_ms": 0.529, + "weight": 26.6 + }, + { + "source": "server.layer_2.q", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 0.946, + "std_delta_ms": 0.177, + "weight": 26.96 + }, + { + "source": "server.layer_2.q", + "target": "server.buffer.logits", + "count": 30, + "mean_delta_ms": 0.957, + "std_delta_ms": 0.199, + "weight": 24.83 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_0", + "count": 29, + "mean_delta_ms": 0.975, + "std_delta_ms": 0.205, + "weight": 23.96 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_0.q", + "count": 29, + "mean_delta_ms": 0.98, + "std_delta_ms": 0.206, + "weight": 23.97 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_0.k", + "count": 29, + "mean_delta_ms": 0.985, + "std_delta_ms": 0.206, + "weight": 23.99 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_0.v", + "count": 29, + "mean_delta_ms": 0.994, + "std_delta_ms": 0.208, + "weight": 23.99 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_0", + "count": 29, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.209, + "weight": 24.0 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_0.keys", + "count": 29, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.212, + "weight": 23.96 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_0.values", + "count": 29, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.212, + "weight": 23.97 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_0.ffn_up", + "count": 29, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.213, + "weight": 23.98 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_0.ffn_down", + "count": 29, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.213, + "weight": 24.0 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 1.2, + "std_delta_ms": 0.216, + "weight": 24.58 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 1.206, + "std_delta_ms": 0.217, + "weight": 24.59 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 1.212, + "std_delta_ms": 0.217, + "weight": 24.6 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 1.217, + "std_delta_ms": 0.217, + "weight": 24.61 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 1.224, + "std_delta_ms": 0.218, + "weight": 24.62 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 1.229, + "std_delta_ms": 0.218, + "weight": 24.63 + }, + { + "source": "server.layer_2.q", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 1.234, + "std_delta_ms": 0.218, + "weight": 24.64 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 1.239, + "std_delta_ms": 0.218, + "weight": 24.65 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 1.244, + "std_delta_ms": 0.219, + "weight": 24.66 + }, + { + "source": "server.layer_2.q", + "target": "server.layer_2", + "count": 27, + "mean_delta_ms": 1.388, + "std_delta_ms": 0.124, + "weight": 24.78 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_2.v", + "count": 57, + "mean_delta_ms": 0.666, + "std_delta_ms": 0.702, + "weight": 27.76 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_2", + "count": 57, + "mean_delta_ms": 0.673, + "std_delta_ms": 0.702, + "weight": 27.89 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_2.keys", + "count": 57, + "mean_delta_ms": 0.678, + "std_delta_ms": 0.702, + "weight": 28.01 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_2.values", + "count": 57, + "mean_delta_ms": 0.683, + "std_delta_ms": 0.702, + "weight": 28.12 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_2.ffn_up", + "count": 57, + "mean_delta_ms": 0.688, + "std_delta_ms": 0.702, + "weight": 28.22 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_2.ffn_down", + "count": 57, + "mean_delta_ms": 0.704, + "std_delta_ms": 0.704, + "weight": 28.5 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_3", + "count": 57, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.703, + "weight": 31.71 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_3.q", + "count": 57, + "mean_delta_ms": 0.889, + "std_delta_ms": 0.703, + "weight": 31.83 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_3.k", + "count": 57, + "mean_delta_ms": 0.894, + "std_delta_ms": 0.703, + "weight": 31.92 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_3.v", + "count": 57, + "mean_delta_ms": 0.901, + "std_delta_ms": 0.703, + "weight": 32.02 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_3", + "count": 57, + "mean_delta_ms": 0.907, + "std_delta_ms": 0.703, + "weight": 32.12 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_3.keys", + "count": 57, + "mean_delta_ms": 0.913, + "std_delta_ms": 0.703, + "weight": 32.2 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.918, + "std_delta_ms": 0.703, + "weight": 32.28 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.923, + "std_delta_ms": 0.702, + "weight": 32.38 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.929, + "std_delta_ms": 0.702, + "weight": 32.46 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_4", + "count": 52, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.664, + "weight": 31.49 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_4.q", + "count": 52, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.664, + "weight": 31.57 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_4.k", + "count": 52, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.664, + "weight": 31.65 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_4.v", + "count": 52, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.664, + "weight": 31.71 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_4", + "count": 51, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.656, + "weight": 31.1 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_4.keys", + "count": 51, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.656, + "weight": 31.16 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_4.values", + "count": 51, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.656, + "weight": 31.23 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_4.ffn_up", + "count": 51, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.656, + "weight": 31.29 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_4.ffn_down", + "count": 51, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.656, + "weight": 31.34 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_5", + "count": 41, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.545, + "weight": 26.75 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_5.q", + "count": 41, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.545, + "weight": 26.83 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_5.k", + "count": 41, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.545, + "weight": 26.88 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_5.v", + "count": 41, + "mean_delta_ms": 1.042, + "std_delta_ms": 0.544, + "weight": 26.93 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_5", + "count": 41, + "mean_delta_ms": 1.048, + "std_delta_ms": 0.544, + "weight": 26.98 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_5.keys", + "count": 41, + "mean_delta_ms": 1.053, + "std_delta_ms": 0.544, + "weight": 27.03 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_5.values", + "count": 40, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.529, + "weight": 26.46 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_5.ffn_up", + "count": 40, + "mean_delta_ms": 1.039, + "std_delta_ms": 0.529, + "weight": 26.5 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_5.ffn_down", + "count": 40, + "mean_delta_ms": 1.044, + "std_delta_ms": 0.529, + "weight": 26.55 + }, + { + "source": "server.layer_2.k", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 0.94, + "std_delta_ms": 0.177, + "weight": 26.94 + }, + { + "source": "server.layer_2.k", + "target": "server.buffer.logits", + "count": 30, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.199, + "weight": 24.81 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_0", + "count": 29, + "mean_delta_ms": 0.969, + "std_delta_ms": 0.205, + "weight": 23.94 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_0.q", + "count": 29, + "mean_delta_ms": 0.975, + "std_delta_ms": 0.206, + "weight": 23.95 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_0.k", + "count": 29, + "mean_delta_ms": 0.98, + "std_delta_ms": 0.206, + "weight": 23.97 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_0.v", + "count": 29, + "mean_delta_ms": 0.988, + "std_delta_ms": 0.207, + "weight": 23.97 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_0", + "count": 29, + "mean_delta_ms": 0.995, + "std_delta_ms": 0.208, + "weight": 23.98 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_0.keys", + "count": 29, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.211, + "weight": 23.94 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_0.values", + "count": 29, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.212, + "weight": 23.96 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_0.ffn_up", + "count": 29, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.212, + "weight": 23.97 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_0.ffn_down", + "count": 29, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.213, + "weight": 23.98 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 1.194, + "std_delta_ms": 0.215, + "weight": 24.57 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 1.201, + "std_delta_ms": 0.216, + "weight": 24.57 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 1.206, + "std_delta_ms": 0.217, + "weight": 24.58 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 1.211, + "std_delta_ms": 0.217, + "weight": 24.6 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 1.218, + "std_delta_ms": 0.217, + "weight": 24.61 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 1.224, + "std_delta_ms": 0.218, + "weight": 24.62 + }, + { + "source": "server.layer_2.k", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 1.229, + "std_delta_ms": 0.218, + "weight": 24.63 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 1.233, + "std_delta_ms": 0.218, + "weight": 24.64 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 1.238, + "std_delta_ms": 0.218, + "weight": 24.65 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_2", + "count": 27, + "mean_delta_ms": 1.383, + "std_delta_ms": 0.124, + "weight": 24.78 + }, + { + "source": "server.layer_2.k", + "target": "server.layer_2.q", + "count": 27, + "mean_delta_ms": 1.39, + "std_delta_ms": 0.125, + "weight": 24.77 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_2", + "count": 57, + "mean_delta_ms": 0.667, + "std_delta_ms": 0.702, + "weight": 27.78 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_2.keys", + "count": 57, + "mean_delta_ms": 0.673, + "std_delta_ms": 0.702, + "weight": 27.9 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_2.values", + "count": 57, + "mean_delta_ms": 0.678, + "std_delta_ms": 0.702, + "weight": 28.0 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_2.ffn_up", + "count": 57, + "mean_delta_ms": 0.683, + "std_delta_ms": 0.702, + "weight": 28.11 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_2.ffn_down", + "count": 57, + "mean_delta_ms": 0.698, + "std_delta_ms": 0.704, + "weight": 28.38 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_3", + "count": 57, + "mean_delta_ms": 0.875, + "std_delta_ms": 0.703, + "weight": 31.62 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_3.q", + "count": 57, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.703, + "weight": 31.74 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_3.k", + "count": 57, + "mean_delta_ms": 0.889, + "std_delta_ms": 0.703, + "weight": 31.83 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_3.v", + "count": 57, + "mean_delta_ms": 0.895, + "std_delta_ms": 0.703, + "weight": 31.93 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_3", + "count": 57, + "mean_delta_ms": 0.902, + "std_delta_ms": 0.703, + "weight": 32.03 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_3.keys", + "count": 57, + "mean_delta_ms": 0.907, + "std_delta_ms": 0.703, + "weight": 32.12 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.912, + "std_delta_ms": 0.703, + "weight": 32.2 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.918, + "std_delta_ms": 0.702, + "weight": 32.29 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.923, + "std_delta_ms": 0.702, + "weight": 32.37 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_4", + "count": 52, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.664, + "weight": 31.42 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_4.q", + "count": 52, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.664, + "weight": 31.51 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_4.k", + "count": 52, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.664, + "weight": 31.58 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_4.v", + "count": 52, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.664, + "weight": 31.64 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_4", + "count": 52, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.663, + "weight": 31.72 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_4.keys", + "count": 51, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.656, + "weight": 31.1 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_4.values", + "count": 51, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.656, + "weight": 31.16 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_4.ffn_up", + "count": 51, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.656, + "weight": 31.22 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_4.ffn_down", + "count": 51, + "mean_delta_ms": 1.04, + "std_delta_ms": 0.656, + "weight": 31.28 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_5", + "count": 41, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.546, + "weight": 26.69 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_5.q", + "count": 41, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.545, + "weight": 26.78 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_5.k", + "count": 41, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.545, + "weight": 26.83 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_5.v", + "count": 41, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.545, + "weight": 26.87 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_5", + "count": 41, + "mean_delta_ms": 1.042, + "std_delta_ms": 0.545, + "weight": 26.93 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_5.keys", + "count": 41, + "mean_delta_ms": 1.048, + "std_delta_ms": 0.544, + "weight": 26.98 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_5.values", + "count": 41, + "mean_delta_ms": 1.053, + "std_delta_ms": 0.544, + "weight": 27.03 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_5.ffn_up", + "count": 40, + "mean_delta_ms": 1.034, + "std_delta_ms": 0.53, + "weight": 26.45 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_5.ffn_down", + "count": 40, + "mean_delta_ms": 1.039, + "std_delta_ms": 0.529, + "weight": 26.5 + }, + { + "source": "server.layer_2.v", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 0.935, + "std_delta_ms": 0.177, + "weight": 26.91 + }, + { + "source": "server.layer_2.v", + "target": "server.buffer.logits", + "count": 30, + "mean_delta_ms": 0.946, + "std_delta_ms": 0.199, + "weight": 24.78 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_0", + "count": 29, + "mean_delta_ms": 0.964, + "std_delta_ms": 0.205, + "weight": 23.91 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_0.q", + "count": 29, + "mean_delta_ms": 0.969, + "std_delta_ms": 0.206, + "weight": 23.92 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_0.k", + "count": 29, + "mean_delta_ms": 0.974, + "std_delta_ms": 0.206, + "weight": 23.94 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_0.v", + "count": 29, + "mean_delta_ms": 0.983, + "std_delta_ms": 0.208, + "weight": 23.94 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_0", + "count": 29, + "mean_delta_ms": 0.99, + "std_delta_ms": 0.208, + "weight": 23.96 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_0.keys", + "count": 29, + "mean_delta_ms": 0.996, + "std_delta_ms": 0.212, + "weight": 23.92 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_0.values", + "count": 29, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.212, + "weight": 23.93 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_0.ffn_up", + "count": 29, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.212, + "weight": 23.94 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_0.ffn_down", + "count": 29, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.213, + "weight": 23.96 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 1.188, + "std_delta_ms": 0.216, + "weight": 24.55 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 1.195, + "std_delta_ms": 0.216, + "weight": 24.55 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 1.201, + "std_delta_ms": 0.217, + "weight": 24.56 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 1.206, + "std_delta_ms": 0.217, + "weight": 24.58 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 1.213, + "std_delta_ms": 0.217, + "weight": 24.59 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 1.218, + "std_delta_ms": 0.218, + "weight": 24.6 + }, + { + "source": "server.layer_2.v", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 1.223, + "std_delta_ms": 0.218, + "weight": 24.61 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 1.228, + "std_delta_ms": 0.218, + "weight": 24.62 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 1.233, + "std_delta_ms": 0.219, + "weight": 24.63 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_2", + "count": 27, + "mean_delta_ms": 1.377, + "std_delta_ms": 0.123, + "weight": 24.78 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_2.q", + "count": 27, + "mean_delta_ms": 1.384, + "std_delta_ms": 0.124, + "weight": 24.78 + }, + { + "source": "server.layer_2.v", + "target": "server.layer_2.k", + "count": 27, + "mean_delta_ms": 1.389, + "std_delta_ms": 0.125, + "weight": 24.78 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_2.keys", + "count": 57, + "mean_delta_ms": 0.666, + "std_delta_ms": 0.702, + "weight": 27.76 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_2.values", + "count": 57, + "mean_delta_ms": 0.671, + "std_delta_ms": 0.702, + "weight": 27.87 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_2.ffn_up", + "count": 57, + "mean_delta_ms": 0.676, + "std_delta_ms": 0.702, + "weight": 27.97 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_2.ffn_down", + "count": 57, + "mean_delta_ms": 0.692, + "std_delta_ms": 0.704, + "weight": 28.26 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_3", + "count": 57, + "mean_delta_ms": 0.869, + "std_delta_ms": 0.703, + "weight": 31.52 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_3.q", + "count": 57, + "mean_delta_ms": 0.877, + "std_delta_ms": 0.703, + "weight": 31.64 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_3.k", + "count": 57, + "mean_delta_ms": 0.882, + "std_delta_ms": 0.703, + "weight": 31.73 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_3.v", + "count": 57, + "mean_delta_ms": 0.889, + "std_delta_ms": 0.703, + "weight": 31.83 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_3", + "count": 57, + "mean_delta_ms": 0.895, + "std_delta_ms": 0.703, + "weight": 31.93 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_3.keys", + "count": 57, + "mean_delta_ms": 0.901, + "std_delta_ms": 0.703, + "weight": 32.02 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.906, + "std_delta_ms": 0.703, + "weight": 32.1 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.911, + "std_delta_ms": 0.702, + "weight": 32.2 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.917, + "std_delta_ms": 0.702, + "weight": 32.28 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_4", + "count": 52, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.664, + "weight": 31.34 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_4.q", + "count": 52, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.664, + "weight": 31.43 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_4.k", + "count": 52, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.664, + "weight": 31.5 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_4.v", + "count": 52, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.664, + "weight": 31.56 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_4", + "count": 52, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.663, + "weight": 31.65 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_4.keys", + "count": 52, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.663, + "weight": 31.72 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_4.values", + "count": 51, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.656, + "weight": 31.09 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_4.ffn_up", + "count": 51, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.656, + "weight": 31.15 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_4.ffn_down", + "count": 51, + "mean_delta_ms": 1.034, + "std_delta_ms": 0.656, + "weight": 31.21 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_5", + "count": 41, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.546, + "weight": 26.63 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_5.q", + "count": 41, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.545, + "weight": 26.72 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_5.k", + "count": 41, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.545, + "weight": 26.77 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_5.v", + "count": 41, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.545, + "weight": 26.81 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_5", + "count": 41, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.545, + "weight": 26.87 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_5.keys", + "count": 41, + "mean_delta_ms": 1.042, + "std_delta_ms": 0.545, + "weight": 26.92 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_5.values", + "count": 41, + "mean_delta_ms": 1.047, + "std_delta_ms": 0.544, + "weight": 26.97 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_5.ffn_up", + "count": 41, + "mean_delta_ms": 1.051, + "std_delta_ms": 0.544, + "weight": 27.02 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_5.ffn_down", + "count": 40, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.53, + "weight": 26.44 + }, + { + "source": "server.kv_cache_2", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 0.928, + "std_delta_ms": 0.176, + "weight": 26.89 + }, + { + "source": "server.kv_cache_2", + "target": "server.buffer.logits", + "count": 30, + "mean_delta_ms": 0.94, + "std_delta_ms": 0.199, + "weight": 24.75 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_0", + "count": 29, + "mean_delta_ms": 0.957, + "std_delta_ms": 0.205, + "weight": 23.89 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_0.q", + "count": 29, + "mean_delta_ms": 0.963, + "std_delta_ms": 0.206, + "weight": 23.9 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_0.k", + "count": 29, + "mean_delta_ms": 0.968, + "std_delta_ms": 0.206, + "weight": 23.92 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_0.v", + "count": 29, + "mean_delta_ms": 0.976, + "std_delta_ms": 0.207, + "weight": 23.92 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_0", + "count": 29, + "mean_delta_ms": 0.983, + "std_delta_ms": 0.208, + "weight": 23.93 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_0.keys", + "count": 29, + "mean_delta_ms": 0.99, + "std_delta_ms": 0.211, + "weight": 23.89 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_0.values", + "count": 29, + "mean_delta_ms": 0.994, + "std_delta_ms": 0.212, + "weight": 23.91 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_0.ffn_up", + "count": 29, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.212, + "weight": 23.92 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_0.ffn_down", + "count": 29, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.213, + "weight": 23.93 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 1.182, + "std_delta_ms": 0.215, + "weight": 24.53 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 1.189, + "std_delta_ms": 0.216, + "weight": 24.53 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 1.194, + "std_delta_ms": 0.217, + "weight": 24.55 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 1.199, + "std_delta_ms": 0.217, + "weight": 24.56 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 1.206, + "std_delta_ms": 0.217, + "weight": 24.57 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 1.212, + "std_delta_ms": 0.218, + "weight": 24.58 + }, + { + "source": "server.kv_cache_2", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 1.217, + "std_delta_ms": 0.218, + "weight": 24.59 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 1.221, + "std_delta_ms": 0.218, + "weight": 24.6 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 1.227, + "std_delta_ms": 0.219, + "weight": 24.61 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_2", + "count": 27, + "mean_delta_ms": 1.371, + "std_delta_ms": 0.123, + "weight": 24.78 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_2.q", + "count": 27, + "mean_delta_ms": 1.378, + "std_delta_ms": 0.124, + "weight": 24.77 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_2.k", + "count": 27, + "mean_delta_ms": 1.383, + "std_delta_ms": 0.125, + "weight": 24.77 + }, + { + "source": "server.kv_cache_2", + "target": "server.layer_2.v", + "count": 27, + "mean_delta_ms": 1.388, + "std_delta_ms": 0.125, + "weight": 24.77 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_2.values", + "count": 57, + "mean_delta_ms": 0.666, + "std_delta_ms": 0.702, + "weight": 27.75 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_2.ffn_up", + "count": 57, + "mean_delta_ms": 0.67, + "std_delta_ms": 0.702, + "weight": 27.85 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_2.ffn_down", + "count": 57, + "mean_delta_ms": 0.686, + "std_delta_ms": 0.704, + "weight": 28.14 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_3", + "count": 57, + "mean_delta_ms": 0.863, + "std_delta_ms": 0.703, + "weight": 31.42 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_3.q", + "count": 57, + "mean_delta_ms": 0.871, + "std_delta_ms": 0.703, + "weight": 31.55 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_3.k", + "count": 57, + "mean_delta_ms": 0.877, + "std_delta_ms": 0.703, + "weight": 31.64 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_3.v", + "count": 57, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.703, + "weight": 31.75 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_3", + "count": 57, + "mean_delta_ms": 0.89, + "std_delta_ms": 0.703, + "weight": 31.85 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_3.keys", + "count": 57, + "mean_delta_ms": 0.895, + "std_delta_ms": 0.703, + "weight": 31.93 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.9, + "std_delta_ms": 0.703, + "weight": 32.01 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.906, + "std_delta_ms": 0.702, + "weight": 32.11 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.911, + "std_delta_ms": 0.702, + "weight": 32.19 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_4", + "count": 52, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.664, + "weight": 31.27 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_4.q", + "count": 52, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.664, + "weight": 31.36 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_4.k", + "count": 52, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.664, + "weight": 31.43 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_4.v", + "count": 52, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.664, + "weight": 31.5 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_4", + "count": 52, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.663, + "weight": 31.58 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_4.keys", + "count": 52, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.663, + "weight": 31.65 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_4.values", + "count": 52, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.663, + "weight": 31.71 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_4.ffn_up", + "count": 51, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.656, + "weight": 31.08 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_4.ffn_down", + "count": 51, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.656, + "weight": 31.14 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_5", + "count": 41, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.546, + "weight": 26.58 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_5.q", + "count": 41, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.545, + "weight": 26.66 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_5.k", + "count": 41, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.545, + "weight": 26.71 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_5.v", + "count": 41, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.545, + "weight": 26.76 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_5", + "count": 41, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.545, + "weight": 26.82 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_5.keys", + "count": 41, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.545, + "weight": 26.87 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_5.values", + "count": 41, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.545, + "weight": 26.92 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_5.ffn_up", + "count": 41, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.544, + "weight": 26.96 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_5.ffn_down", + "count": 41, + "mean_delta_ms": 1.051, + "std_delta_ms": 0.544, + "weight": 27.01 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 0.922, + "std_delta_ms": 0.176, + "weight": 26.87 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.buffer.logits", + "count": 30, + "mean_delta_ms": 0.934, + "std_delta_ms": 0.199, + "weight": 24.73 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_0", + "count": 29, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.205, + "weight": 23.87 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_0.q", + "count": 29, + "mean_delta_ms": 0.957, + "std_delta_ms": 0.205, + "weight": 23.88 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_0.k", + "count": 29, + "mean_delta_ms": 0.962, + "std_delta_ms": 0.205, + "weight": 23.9 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_0.v", + "count": 29, + "mean_delta_ms": 0.971, + "std_delta_ms": 0.207, + "weight": 23.9 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_0", + "count": 29, + "mean_delta_ms": 0.978, + "std_delta_ms": 0.208, + "weight": 23.91 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_0.keys", + "count": 29, + "mean_delta_ms": 0.984, + "std_delta_ms": 0.211, + "weight": 23.88 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_0.values", + "count": 29, + "mean_delta_ms": 0.989, + "std_delta_ms": 0.211, + "weight": 23.89 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_0.ffn_up", + "count": 29, + "mean_delta_ms": 0.993, + "std_delta_ms": 0.212, + "weight": 23.9 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_0.ffn_down", + "count": 29, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.212, + "weight": 23.92 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 1.176, + "std_delta_ms": 0.215, + "weight": 24.52 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 1.183, + "std_delta_ms": 0.216, + "weight": 24.52 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 1.189, + "std_delta_ms": 0.216, + "weight": 24.54 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 1.194, + "std_delta_ms": 0.216, + "weight": 24.55 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 1.201, + "std_delta_ms": 0.217, + "weight": 24.56 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 1.206, + "std_delta_ms": 0.217, + "weight": 24.57 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 1.211, + "std_delta_ms": 0.218, + "weight": 24.58 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 1.216, + "std_delta_ms": 0.218, + "weight": 24.59 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 1.221, + "std_delta_ms": 0.218, + "weight": 24.6 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_2", + "count": 27, + "mean_delta_ms": 1.365, + "std_delta_ms": 0.122, + "weight": 24.78 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_2.q", + "count": 27, + "mean_delta_ms": 1.372, + "std_delta_ms": 0.123, + "weight": 24.77 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_2.k", + "count": 27, + "mean_delta_ms": 1.377, + "std_delta_ms": 0.124, + "weight": 24.77 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.layer_2.v", + "count": 27, + "mean_delta_ms": 1.383, + "std_delta_ms": 0.124, + "weight": 24.77 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.kv_cache_2", + "count": 27, + "mean_delta_ms": 1.389, + "std_delta_ms": 0.125, + "weight": 24.78 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_2.ffn_up", + "count": 57, + "mean_delta_ms": 0.665, + "std_delta_ms": 0.702, + "weight": 27.75 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_2.ffn_down", + "count": 57, + "mean_delta_ms": 0.681, + "std_delta_ms": 0.704, + "weight": 28.03 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_3", + "count": 57, + "mean_delta_ms": 0.858, + "std_delta_ms": 0.703, + "weight": 31.34 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_3.q", + "count": 57, + "mean_delta_ms": 0.866, + "std_delta_ms": 0.703, + "weight": 31.47 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_3.k", + "count": 57, + "mean_delta_ms": 0.872, + "std_delta_ms": 0.702, + "weight": 31.56 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_3.v", + "count": 57, + "mean_delta_ms": 0.878, + "std_delta_ms": 0.703, + "weight": 31.67 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_3", + "count": 57, + "mean_delta_ms": 0.885, + "std_delta_ms": 0.703, + "weight": 31.77 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_3.keys", + "count": 57, + "mean_delta_ms": 0.89, + "std_delta_ms": 0.703, + "weight": 31.85 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.895, + "std_delta_ms": 0.703, + "weight": 31.93 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.901, + "std_delta_ms": 0.702, + "weight": 32.03 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.906, + "std_delta_ms": 0.702, + "weight": 32.11 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_4", + "count": 52, + "mean_delta_ms": 0.997, + "std_delta_ms": 0.664, + "weight": 31.21 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_4.q", + "count": 52, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.664, + "weight": 31.3 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_4.k", + "count": 52, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.664, + "weight": 31.37 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_4.v", + "count": 52, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.664, + "weight": 31.43 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_4", + "count": 52, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.663, + "weight": 31.52 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_4.keys", + "count": 52, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.663, + "weight": 31.59 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_4.values", + "count": 52, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.663, + "weight": 31.65 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_4.ffn_up", + "count": 52, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.663, + "weight": 31.72 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_4.ffn_down", + "count": 51, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.656, + "weight": 31.08 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_5", + "count": 42, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.56, + "weight": 27.15 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_5.q", + "count": 41, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.546, + "weight": 26.62 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_5.k", + "count": 41, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.545, + "weight": 26.67 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_5.v", + "count": 41, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.545, + "weight": 26.71 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_5", + "count": 41, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.545, + "weight": 26.77 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_5.keys", + "count": 41, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.545, + "weight": 26.82 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_5.values", + "count": 41, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.545, + "weight": 26.87 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_5.ffn_up", + "count": 41, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.545, + "weight": 26.92 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_5.ffn_down", + "count": 41, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.544, + "weight": 26.96 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 0.917, + "std_delta_ms": 0.176, + "weight": 26.85 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.buffer.logits", + "count": 30, + "mean_delta_ms": 0.929, + "std_delta_ms": 0.199, + "weight": 24.71 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_0", + "count": 29, + "mean_delta_ms": 0.946, + "std_delta_ms": 0.204, + "weight": 23.85 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_0.q", + "count": 29, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.205, + "weight": 23.86 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_0.k", + "count": 29, + "mean_delta_ms": 0.957, + "std_delta_ms": 0.205, + "weight": 23.88 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_0.v", + "count": 29, + "mean_delta_ms": 0.966, + "std_delta_ms": 0.207, + "weight": 23.88 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_0", + "count": 29, + "mean_delta_ms": 0.972, + "std_delta_ms": 0.208, + "weight": 23.9 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_0.keys", + "count": 29, + "mean_delta_ms": 0.979, + "std_delta_ms": 0.211, + "weight": 23.86 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_0.values", + "count": 29, + "mean_delta_ms": 0.984, + "std_delta_ms": 0.211, + "weight": 23.87 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_0.ffn_up", + "count": 29, + "mean_delta_ms": 0.988, + "std_delta_ms": 0.212, + "weight": 23.88 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_0.ffn_down", + "count": 29, + "mean_delta_ms": 0.993, + "std_delta_ms": 0.212, + "weight": 23.9 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 1.171, + "std_delta_ms": 0.215, + "weight": 24.51 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 1.178, + "std_delta_ms": 0.216, + "weight": 24.51 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 1.183, + "std_delta_ms": 0.216, + "weight": 24.52 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 1.189, + "std_delta_ms": 0.216, + "weight": 24.54 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 1.195, + "std_delta_ms": 0.217, + "weight": 24.55 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 1.201, + "std_delta_ms": 0.217, + "weight": 24.56 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 1.206, + "std_delta_ms": 0.217, + "weight": 24.57 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 1.211, + "std_delta_ms": 0.218, + "weight": 24.58 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 1.216, + "std_delta_ms": 0.218, + "weight": 24.59 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_2", + "count": 27, + "mean_delta_ms": 1.36, + "std_delta_ms": 0.122, + "weight": 24.78 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_2.q", + "count": 27, + "mean_delta_ms": 1.367, + "std_delta_ms": 0.123, + "weight": 24.77 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_2.k", + "count": 27, + "mean_delta_ms": 1.372, + "std_delta_ms": 0.123, + "weight": 24.77 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.layer_2.v", + "count": 27, + "mean_delta_ms": 1.378, + "std_delta_ms": 0.124, + "weight": 24.77 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_2", + "count": 27, + "mean_delta_ms": 1.384, + "std_delta_ms": 0.124, + "weight": 24.77 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.kv_cache_2.keys", + "count": 27, + "mean_delta_ms": 1.389, + "std_delta_ms": 0.125, + "weight": 24.77 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_2.ffn_down", + "count": 57, + "mean_delta_ms": 0.676, + "std_delta_ms": 0.704, + "weight": 27.93 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_3", + "count": 57, + "mean_delta_ms": 0.853, + "std_delta_ms": 0.702, + "weight": 31.26 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_3.q", + "count": 57, + "mean_delta_ms": 0.861, + "std_delta_ms": 0.702, + "weight": 31.39 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_3.k", + "count": 57, + "mean_delta_ms": 0.867, + "std_delta_ms": 0.702, + "weight": 31.48 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_3.v", + "count": 57, + "mean_delta_ms": 0.873, + "std_delta_ms": 0.703, + "weight": 31.59 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_3", + "count": 57, + "mean_delta_ms": 0.88, + "std_delta_ms": 0.703, + "weight": 31.69 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_3.keys", + "count": 57, + "mean_delta_ms": 0.885, + "std_delta_ms": 0.702, + "weight": 31.77 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.89, + "std_delta_ms": 0.702, + "weight": 31.86 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.896, + "std_delta_ms": 0.702, + "weight": 31.96 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.901, + "std_delta_ms": 0.702, + "weight": 32.04 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_4", + "count": 54, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.679, + "weight": 32.54 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_4.q", + "count": 52, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.664, + "weight": 31.24 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_4.k", + "count": 52, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.664, + "weight": 31.31 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_4.v", + "count": 52, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.664, + "weight": 31.37 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_4", + "count": 52, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.663, + "weight": 31.46 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_4.keys", + "count": 52, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.663, + "weight": 31.53 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_4.values", + "count": 52, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.663, + "weight": 31.59 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_4.ffn_up", + "count": 52, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.663, + "weight": 31.66 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_4.ffn_down", + "count": 52, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.663, + "weight": 31.72 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_5", + "count": 42, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.561, + "weight": 27.1 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_5.q", + "count": 42, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.56, + "weight": 27.19 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_5.k", + "count": 41, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.545, + "weight": 26.62 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_5.v", + "count": 41, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.545, + "weight": 26.67 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_5", + "count": 41, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.545, + "weight": 26.73 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_5.keys", + "count": 41, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.545, + "weight": 26.78 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_5.values", + "count": 41, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.545, + "weight": 26.83 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_5.ffn_up", + "count": 41, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.545, + "weight": 26.87 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_5.ffn_down", + "count": 41, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.545, + "weight": 26.92 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 0.912, + "std_delta_ms": 0.176, + "weight": 26.83 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.buffer.logits", + "count": 30, + "mean_delta_ms": 0.924, + "std_delta_ms": 0.199, + "weight": 24.69 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_0", + "count": 29, + "mean_delta_ms": 0.941, + "std_delta_ms": 0.204, + "weight": 23.83 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_0.q", + "count": 29, + "mean_delta_ms": 0.947, + "std_delta_ms": 0.205, + "weight": 23.84 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_0.k", + "count": 29, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.205, + "weight": 23.86 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_0.v", + "count": 29, + "mean_delta_ms": 0.961, + "std_delta_ms": 0.207, + "weight": 23.86 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_0", + "count": 29, + "mean_delta_ms": 0.967, + "std_delta_ms": 0.208, + "weight": 23.88 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_0.keys", + "count": 29, + "mean_delta_ms": 0.974, + "std_delta_ms": 0.211, + "weight": 23.84 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_0.values", + "count": 29, + "mean_delta_ms": 0.979, + "std_delta_ms": 0.211, + "weight": 23.85 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_0.ffn_up", + "count": 29, + "mean_delta_ms": 0.983, + "std_delta_ms": 0.212, + "weight": 23.86 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_0.ffn_down", + "count": 29, + "mean_delta_ms": 0.988, + "std_delta_ms": 0.212, + "weight": 23.88 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 1.166, + "std_delta_ms": 0.215, + "weight": 24.49 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 1.173, + "std_delta_ms": 0.216, + "weight": 24.5 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 1.179, + "std_delta_ms": 0.216, + "weight": 24.51 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 1.184, + "std_delta_ms": 0.216, + "weight": 24.52 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 1.191, + "std_delta_ms": 0.217, + "weight": 24.54 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 1.196, + "std_delta_ms": 0.217, + "weight": 24.55 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 1.201, + "std_delta_ms": 0.217, + "weight": 24.56 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 1.206, + "std_delta_ms": 0.217, + "weight": 24.57 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 1.211, + "std_delta_ms": 0.218, + "weight": 24.58 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_2", + "count": 27, + "mean_delta_ms": 1.355, + "std_delta_ms": 0.122, + "weight": 24.77 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_2.q", + "count": 27, + "mean_delta_ms": 1.362, + "std_delta_ms": 0.123, + "weight": 24.77 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_2.k", + "count": 27, + "mean_delta_ms": 1.367, + "std_delta_ms": 0.123, + "weight": 24.77 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.layer_2.v", + "count": 27, + "mean_delta_ms": 1.373, + "std_delta_ms": 0.124, + "weight": 24.77 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_2", + "count": 27, + "mean_delta_ms": 1.379, + "std_delta_ms": 0.124, + "weight": 24.77 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_2.keys", + "count": 27, + "mean_delta_ms": 1.384, + "std_delta_ms": 0.125, + "weight": 24.77 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.kv_cache_2.values", + "count": 27, + "mean_delta_ms": 1.389, + "std_delta_ms": 0.125, + "weight": 24.77 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_3", + "count": 57, + "mean_delta_ms": 0.838, + "std_delta_ms": 0.7, + "weight": 31.04 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_3.q", + "count": 57, + "mean_delta_ms": 0.845, + "std_delta_ms": 0.7, + "weight": 31.17 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_3.k", + "count": 57, + "mean_delta_ms": 0.851, + "std_delta_ms": 0.7, + "weight": 31.27 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_3.v", + "count": 57, + "mean_delta_ms": 0.858, + "std_delta_ms": 0.701, + "weight": 31.37 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_3", + "count": 57, + "mean_delta_ms": 0.864, + "std_delta_ms": 0.701, + "weight": 31.48 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_3.keys", + "count": 57, + "mean_delta_ms": 0.869, + "std_delta_ms": 0.7, + "weight": 31.56 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.7, + "weight": 31.65 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.88, + "std_delta_ms": 0.7, + "weight": 31.75 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.885, + "std_delta_ms": 0.7, + "weight": 31.83 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_4", + "count": 54, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.676, + "weight": 32.39 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_4.q", + "count": 53, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.668, + "weight": 31.78 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_4.k", + "count": 53, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.668, + "weight": 31.86 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_4.v", + "count": 53, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.668, + "weight": 31.92 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_4", + "count": 53, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.668, + "weight": 32.01 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_4.keys", + "count": 53, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.668, + "weight": 32.08 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_4.values", + "count": 53, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.668, + "weight": 32.15 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_4.ffn_up", + "count": 53, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.668, + "weight": 32.21 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_4.ffn_down", + "count": 53, + "mean_delta_ms": 1.04, + "std_delta_ms": 0.668, + "weight": 32.27 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_5", + "count": 43, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.574, + "weight": 27.6 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_5.q", + "count": 43, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.574, + "weight": 27.69 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_5.k", + "count": 43, + "mean_delta_ms": 1.043, + "std_delta_ms": 0.573, + "weight": 27.74 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_5.v", + "count": 42, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.561, + "weight": 27.15 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_5", + "count": 42, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.56, + "weight": 27.21 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_5.keys", + "count": 42, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.56, + "weight": 27.26 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_5.values", + "count": 42, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.56, + "weight": 27.31 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_5.ffn_up", + "count": 42, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.56, + "weight": 27.36 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_5.ffn_down", + "count": 42, + "mean_delta_ms": 1.051, + "std_delta_ms": 0.56, + "weight": 27.4 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 0.898, + "std_delta_ms": 0.172, + "weight": 26.85 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.buffer.logits", + "count": 30, + "mean_delta_ms": 0.909, + "std_delta_ms": 0.196, + "weight": 24.69 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_0", + "count": 29, + "mean_delta_ms": 0.926, + "std_delta_ms": 0.202, + "weight": 23.82 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_0.q", + "count": 29, + "mean_delta_ms": 0.932, + "std_delta_ms": 0.202, + "weight": 23.83 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_0.k", + "count": 29, + "mean_delta_ms": 0.937, + "std_delta_ms": 0.203, + "weight": 23.84 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_0.v", + "count": 29, + "mean_delta_ms": 0.945, + "std_delta_ms": 0.204, + "weight": 23.85 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_0", + "count": 29, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.205, + "weight": 23.86 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_0.keys", + "count": 29, + "mean_delta_ms": 0.958, + "std_delta_ms": 0.208, + "weight": 23.82 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_0.values", + "count": 29, + "mean_delta_ms": 0.963, + "std_delta_ms": 0.209, + "weight": 23.83 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_0.ffn_up", + "count": 29, + "mean_delta_ms": 0.968, + "std_delta_ms": 0.209, + "weight": 23.84 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_0.ffn_down", + "count": 29, + "mean_delta_ms": 0.973, + "std_delta_ms": 0.21, + "weight": 23.86 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 1.151, + "std_delta_ms": 0.213, + "weight": 24.47 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 1.158, + "std_delta_ms": 0.214, + "weight": 24.48 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 1.163, + "std_delta_ms": 0.214, + "weight": 24.49 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 1.168, + "std_delta_ms": 0.214, + "weight": 24.5 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 1.175, + "std_delta_ms": 0.215, + "weight": 24.52 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 1.18, + "std_delta_ms": 0.215, + "weight": 24.53 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 1.185, + "std_delta_ms": 0.215, + "weight": 24.54 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 1.19, + "std_delta_ms": 0.216, + "weight": 24.55 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 1.195, + "std_delta_ms": 0.216, + "weight": 24.56 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_2", + "count": 27, + "mean_delta_ms": 1.339, + "std_delta_ms": 0.116, + "weight": 24.85 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_2.q", + "count": 27, + "mean_delta_ms": 1.345, + "std_delta_ms": 0.117, + "weight": 24.85 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_2.k", + "count": 27, + "mean_delta_ms": 1.351, + "std_delta_ms": 0.117, + "weight": 24.85 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_2.v", + "count": 27, + "mean_delta_ms": 1.356, + "std_delta_ms": 0.118, + "weight": 24.84 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_2", + "count": 27, + "mean_delta_ms": 1.363, + "std_delta_ms": 0.118, + "weight": 24.84 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_2.keys", + "count": 27, + "mean_delta_ms": 1.368, + "std_delta_ms": 0.119, + "weight": 24.84 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.kv_cache_2.values", + "count": 27, + "mean_delta_ms": 1.373, + "std_delta_ms": 0.119, + "weight": 24.84 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.layer_2.ffn_up", + "count": 27, + "mean_delta_ms": 1.378, + "std_delta_ms": 0.119, + "weight": 24.85 + }, + { + "source": "server.layer_3", + "target": "server.layer_3.q", + "count": 57, + "mean_delta_ms": 0.667, + "std_delta_ms": 0.7, + "weight": 27.81 + }, + { + "source": "server.layer_3", + "target": "server.layer_3.k", + "count": 57, + "mean_delta_ms": 0.673, + "std_delta_ms": 0.7, + "weight": 27.93 + }, + { + "source": "server.layer_3", + "target": "server.layer_3.v", + "count": 57, + "mean_delta_ms": 0.679, + "std_delta_ms": 0.7, + "weight": 28.07 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_3", + "count": 57, + "mean_delta_ms": 0.685, + "std_delta_ms": 0.7, + "weight": 28.2 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_3.keys", + "count": 57, + "mean_delta_ms": 0.691, + "std_delta_ms": 0.7, + "weight": 28.31 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.696, + "std_delta_ms": 0.7, + "weight": 28.42 + }, + { + "source": "server.layer_3", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.702, + "std_delta_ms": 0.699, + "weight": 28.54 + }, + { + "source": "server.layer_3", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.707, + "std_delta_ms": 0.699, + "weight": 28.65 + }, + { + "source": "server.layer_3", + "target": "server.layer_4", + "count": 57, + "mean_delta_ms": 0.893, + "std_delta_ms": 0.703, + "weight": 31.9 + }, + { + "source": "server.layer_3", + "target": "server.layer_4.q", + "count": 56, + "mean_delta_ms": 0.88, + "std_delta_ms": 0.693, + "weight": 31.33 + }, + { + "source": "server.layer_3", + "target": "server.layer_4.k", + "count": 56, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.693, + "weight": 31.42 + }, + { + "source": "server.layer_3", + "target": "server.layer_4.v", + "count": 55, + "mean_delta_ms": 0.87, + "std_delta_ms": 0.682, + "weight": 30.83 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_4", + "count": 55, + "mean_delta_ms": 0.877, + "std_delta_ms": 0.682, + "weight": 30.94 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_4.keys", + "count": 55, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.682, + "weight": 31.03 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_4.values", + "count": 55, + "mean_delta_ms": 0.888, + "std_delta_ms": 0.682, + "weight": 31.11 + }, + { + "source": "server.layer_3", + "target": "server.layer_4.ffn_up", + "count": 55, + "mean_delta_ms": 0.893, + "std_delta_ms": 0.682, + "weight": 31.19 + }, + { + "source": "server.layer_3", + "target": "server.layer_4.ffn_down", + "count": 55, + "mean_delta_ms": 0.898, + "std_delta_ms": 0.682, + "weight": 31.26 + }, + { + "source": "server.layer_3", + "target": "server.layer_5", + "count": 51, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.654, + "weight": 31.05 + }, + { + "source": "server.layer_3", + "target": "server.layer_5.q", + "count": 51, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.654, + "weight": 31.15 + }, + { + "source": "server.layer_3", + "target": "server.layer_5.k", + "count": 51, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.654, + "weight": 31.22 + }, + { + "source": "server.layer_3", + "target": "server.layer_5.v", + "count": 50, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.646, + "weight": 30.58 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_5", + "count": 49, + "mean_delta_ms": 1.003, + "std_delta_ms": 0.637, + "weight": 29.98 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_5.keys", + "count": 49, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.637, + "weight": 30.04 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_5.values", + "count": 49, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.636, + "weight": 30.1 + }, + { + "source": "server.layer_3", + "target": "server.layer_5.ffn_up", + "count": 49, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.636, + "weight": 30.16 + }, + { + "source": "server.layer_3", + "target": "server.layer_5.ffn_down", + "count": 49, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.636, + "weight": 30.22 + }, + { + "source": "server.layer_3", + "target": "server.buffer", + "count": 44, + "mean_delta_ms": 1.048, + "std_delta_ms": 0.556, + "weight": 28.75 + }, + { + "source": "server.layer_3", + "target": "server.buffer.logits", + "count": 42, + "mean_delta_ms": 1.074, + "std_delta_ms": 0.567, + "weight": 27.48 + }, + { + "source": "server.layer_3", + "target": "server.layer_0", + "count": 39, + "mean_delta_ms": 1.052, + "std_delta_ms": 0.548, + "weight": 25.65 + }, + { + "source": "server.layer_3", + "target": "server.layer_0.q", + "count": 38, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.533, + "weight": 25.07 + }, + { + "source": "server.layer_3", + "target": "server.layer_0.k", + "count": 38, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.533, + "weight": 25.11 + }, + { + "source": "server.layer_3", + "target": "server.layer_0.v", + "count": 37, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.515, + "weight": 24.59 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_0", + "count": 37, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.515, + "weight": 24.64 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_0.keys", + "count": 36, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.496, + "weight": 24.11 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_0.values", + "count": 36, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.496, + "weight": 24.14 + }, + { + "source": "server.layer_3", + "target": "server.layer_0.ffn_up", + "count": 36, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.496, + "weight": 24.18 + }, + { + "source": "server.layer_3", + "target": "server.layer_0.ffn_down", + "count": 35, + "mean_delta_ms": 0.992, + "std_delta_ms": 0.474, + "weight": 23.68 + }, + { + "source": "server.layer_3", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 0.972, + "std_delta_ms": 0.211, + "weight": 23.83 + }, + { + "source": "server.layer_3", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 0.979, + "std_delta_ms": 0.212, + "weight": 23.84 + }, + { + "source": "server.layer_3", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 0.984, + "std_delta_ms": 0.212, + "weight": 23.86 + }, + { + "source": "server.layer_3", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 0.989, + "std_delta_ms": 0.212, + "weight": 23.88 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 0.996, + "std_delta_ms": 0.213, + "weight": 23.9 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.213, + "weight": 23.91 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.213, + "weight": 23.93 + }, + { + "source": "server.layer_3", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.214, + "weight": 23.94 + }, + { + "source": "server.layer_3", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.214, + "weight": 23.96 + }, + { + "source": "server.layer_3", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 1.216, + "std_delta_ms": 0.231, + "weight": 24.36 + }, + { + "source": "server.layer_3", + "target": "server.layer_2.q", + "count": 28, + "mean_delta_ms": 1.195, + "std_delta_ms": 0.183, + "weight": 24.27 + }, + { + "source": "server.layer_3", + "target": "server.layer_2.k", + "count": 28, + "mean_delta_ms": 1.201, + "std_delta_ms": 0.184, + "weight": 24.28 + }, + { + "source": "server.layer_3", + "target": "server.layer_2.v", + "count": 28, + "mean_delta_ms": 1.206, + "std_delta_ms": 0.184, + "weight": 24.29 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_2", + "count": 28, + "mean_delta_ms": 1.212, + "std_delta_ms": 0.184, + "weight": 24.3 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_2.keys", + "count": 28, + "mean_delta_ms": 1.218, + "std_delta_ms": 0.185, + "weight": 24.31 + }, + { + "source": "server.layer_3", + "target": "server.kv_cache_2.values", + "count": 28, + "mean_delta_ms": 1.223, + "std_delta_ms": 0.185, + "weight": 24.32 + }, + { + "source": "server.layer_3", + "target": "server.layer_2.ffn_up", + "count": 28, + "mean_delta_ms": 1.228, + "std_delta_ms": 0.185, + "weight": 24.34 + }, + { + "source": "server.layer_3", + "target": "server.layer_2.ffn_down", + "count": 28, + "mean_delta_ms": 1.244, + "std_delta_ms": 0.188, + "weight": 24.33 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_3.k", + "count": 57, + "mean_delta_ms": 0.665, + "std_delta_ms": 0.7, + "weight": 27.77 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_3.v", + "count": 57, + "mean_delta_ms": 0.671, + "std_delta_ms": 0.7, + "weight": 27.91 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_3", + "count": 57, + "mean_delta_ms": 0.677, + "std_delta_ms": 0.7, + "weight": 28.04 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_3.keys", + "count": 57, + "mean_delta_ms": 0.683, + "std_delta_ms": 0.7, + "weight": 28.15 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.688, + "std_delta_ms": 0.7, + "weight": 28.26 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.694, + "std_delta_ms": 0.699, + "weight": 28.39 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.699, + "std_delta_ms": 0.699, + "weight": 28.49 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_4", + "count": 57, + "mean_delta_ms": 0.885, + "std_delta_ms": 0.702, + "weight": 31.78 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_4.q", + "count": 56, + "mean_delta_ms": 0.872, + "std_delta_ms": 0.693, + "weight": 31.21 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_4.k", + "count": 56, + "mean_delta_ms": 0.878, + "std_delta_ms": 0.692, + "weight": 31.3 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_4.v", + "count": 56, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.692, + "weight": 31.38 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_4", + "count": 55, + "mean_delta_ms": 0.869, + "std_delta_ms": 0.682, + "weight": 30.82 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_4.keys", + "count": 55, + "mean_delta_ms": 0.875, + "std_delta_ms": 0.682, + "weight": 30.91 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_4.values", + "count": 55, + "mean_delta_ms": 0.88, + "std_delta_ms": 0.682, + "weight": 30.99 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_4.ffn_up", + "count": 55, + "mean_delta_ms": 0.885, + "std_delta_ms": 0.682, + "weight": 31.07 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_4.ffn_down", + "count": 55, + "mean_delta_ms": 0.89, + "std_delta_ms": 0.682, + "weight": 31.15 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_5", + "count": 51, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.654, + "weight": 30.95 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_5.q", + "count": 51, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.654, + "weight": 31.06 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_5.k", + "count": 51, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.654, + "weight": 31.12 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_5.v", + "count": 51, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.654, + "weight": 31.18 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_5", + "count": 50, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.646, + "weight": 30.56 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_5.keys", + "count": 50, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.646, + "weight": 30.63 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_5.values", + "count": 49, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.637, + "weight": 30.01 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_5.ffn_up", + "count": 49, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.637, + "weight": 30.07 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_5.ffn_down", + "count": 49, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.637, + "weight": 30.12 + }, + { + "source": "server.layer_3.q", + "target": "server.buffer", + "count": 45, + "mean_delta_ms": 1.062, + "std_delta_ms": 0.568, + "weight": 29.32 + }, + { + "source": "server.layer_3.q", + "target": "server.buffer.logits", + "count": 42, + "mean_delta_ms": 1.066, + "std_delta_ms": 0.568, + "weight": 27.41 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_0", + "count": 39, + "mean_delta_ms": 1.045, + "std_delta_ms": 0.548, + "weight": 25.58 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_0.q", + "count": 39, + "mean_delta_ms": 1.05, + "std_delta_ms": 0.548, + "weight": 25.63 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_0.k", + "count": 39, + "mean_delta_ms": 1.055, + "std_delta_ms": 0.548, + "weight": 25.67 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_0.v", + "count": 38, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.533, + "weight": 25.12 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_0", + "count": 37, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.515, + "weight": 24.57 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_0.keys", + "count": 37, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.516, + "weight": 24.61 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_0.values", + "count": 36, + "mean_delta_ms": 1.003, + "std_delta_ms": 0.497, + "weight": 24.07 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_0.ffn_up", + "count": 36, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.497, + "weight": 24.11 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_0.ffn_down", + "count": 36, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.497, + "weight": 24.15 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 0.964, + "std_delta_ms": 0.211, + "weight": 23.8 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 0.971, + "std_delta_ms": 0.212, + "weight": 23.81 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 0.976, + "std_delta_ms": 0.212, + "weight": 23.82 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 0.982, + "std_delta_ms": 0.212, + "weight": 23.84 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 0.988, + "std_delta_ms": 0.213, + "weight": 23.86 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 0.994, + "std_delta_ms": 0.213, + "weight": 23.88 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.213, + "weight": 23.9 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.214, + "weight": 23.91 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.214, + "weight": 23.93 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 1.208, + "std_delta_ms": 0.231, + "weight": 24.35 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.232, + "weight": 24.35 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_2.k", + "count": 28, + "mean_delta_ms": 1.193, + "std_delta_ms": 0.183, + "weight": 24.28 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_2.v", + "count": 28, + "mean_delta_ms": 1.198, + "std_delta_ms": 0.183, + "weight": 24.29 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_2", + "count": 28, + "mean_delta_ms": 1.204, + "std_delta_ms": 0.183, + "weight": 24.3 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_2.keys", + "count": 28, + "mean_delta_ms": 1.21, + "std_delta_ms": 0.184, + "weight": 24.31 + }, + { + "source": "server.layer_3.q", + "target": "server.kv_cache_2.values", + "count": 28, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.184, + "weight": 24.32 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_2.ffn_up", + "count": 28, + "mean_delta_ms": 1.22, + "std_delta_ms": 0.184, + "weight": 24.33 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_2.ffn_down", + "count": 28, + "mean_delta_ms": 1.236, + "std_delta_ms": 0.187, + "weight": 24.33 + }, + { + "source": "server.layer_3.q", + "target": "server.layer_3", + "count": 27, + "mean_delta_ms": 1.383, + "std_delta_ms": 0.122, + "weight": 24.81 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_3.v", + "count": 57, + "mean_delta_ms": 0.666, + "std_delta_ms": 0.7, + "weight": 27.78 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_3", + "count": 57, + "mean_delta_ms": 0.672, + "std_delta_ms": 0.7, + "weight": 27.92 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_3.keys", + "count": 57, + "mean_delta_ms": 0.677, + "std_delta_ms": 0.7, + "weight": 28.03 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.682, + "std_delta_ms": 0.7, + "weight": 28.14 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.688, + "std_delta_ms": 0.699, + "weight": 28.27 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.693, + "std_delta_ms": 0.699, + "weight": 28.38 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_4", + "count": 57, + "mean_delta_ms": 0.879, + "std_delta_ms": 0.703, + "weight": 31.69 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_4.q", + "count": 57, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.703, + "weight": 31.8 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_4.k", + "count": 56, + "mean_delta_ms": 0.872, + "std_delta_ms": 0.693, + "weight": 31.21 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_4.v", + "count": 56, + "mean_delta_ms": 0.877, + "std_delta_ms": 0.693, + "weight": 31.29 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_4", + "count": 56, + "mean_delta_ms": 0.884, + "std_delta_ms": 0.692, + "weight": 31.4 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_4.keys", + "count": 55, + "mean_delta_ms": 0.869, + "std_delta_ms": 0.682, + "weight": 30.82 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_4.values", + "count": 55, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.682, + "weight": 30.9 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_4.ffn_up", + "count": 55, + "mean_delta_ms": 0.88, + "std_delta_ms": 0.682, + "weight": 30.98 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_4.ffn_down", + "count": 55, + "mean_delta_ms": 0.885, + "std_delta_ms": 0.682, + "weight": 31.06 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_5", + "count": 51, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.654, + "weight": 30.88 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_5.q", + "count": 51, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.654, + "weight": 30.99 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_5.k", + "count": 51, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.654, + "weight": 31.05 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_5.v", + "count": 51, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.654, + "weight": 31.11 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_5", + "count": 51, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.654, + "weight": 31.18 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_5.keys", + "count": 50, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.646, + "weight": 30.56 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_5.values", + "count": 50, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.646, + "weight": 30.62 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_5.ffn_up", + "count": 49, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.637, + "weight": 30.0 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_5.ffn_down", + "count": 49, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.637, + "weight": 30.06 + }, + { + "source": "server.layer_3.k", + "target": "server.buffer", + "count": 46, + "mean_delta_ms": 1.077, + "std_delta_ms": 0.579, + "weight": 29.92 + }, + { + "source": "server.layer_3.k", + "target": "server.buffer.logits", + "count": 42, + "mean_delta_ms": 1.061, + "std_delta_ms": 0.568, + "weight": 27.35 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_0", + "count": 39, + "mean_delta_ms": 1.04, + "std_delta_ms": 0.548, + "weight": 25.53 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_0.q", + "count": 39, + "mean_delta_ms": 1.045, + "std_delta_ms": 0.548, + "weight": 25.58 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_0.k", + "count": 39, + "mean_delta_ms": 1.05, + "std_delta_ms": 0.548, + "weight": 25.62 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_0.v", + "count": 39, + "mean_delta_ms": 1.058, + "std_delta_ms": 0.547, + "weight": 25.7 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_0", + "count": 38, + "mean_delta_ms": 1.04, + "std_delta_ms": 0.533, + "weight": 25.13 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_0.keys", + "count": 37, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.516, + "weight": 24.57 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_0.values", + "count": 37, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.516, + "weight": 24.61 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_0.ffn_up", + "count": 36, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.497, + "weight": 24.07 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_0.ffn_down", + "count": 36, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.497, + "weight": 24.11 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 0.959, + "std_delta_ms": 0.211, + "weight": 23.78 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 0.966, + "std_delta_ms": 0.212, + "weight": 23.79 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 0.971, + "std_delta_ms": 0.212, + "weight": 23.81 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 0.976, + "std_delta_ms": 0.212, + "weight": 23.82 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 0.983, + "std_delta_ms": 0.213, + "weight": 23.84 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 0.988, + "std_delta_ms": 0.213, + "weight": 23.86 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 0.993, + "std_delta_ms": 0.213, + "weight": 23.88 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.213, + "weight": 23.89 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 1.003, + "std_delta_ms": 0.214, + "weight": 23.91 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 1.203, + "std_delta_ms": 0.231, + "weight": 24.34 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 1.21, + "std_delta_ms": 0.232, + "weight": 24.34 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.232, + "weight": 24.35 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_2.v", + "count": 28, + "mean_delta_ms": 1.193, + "std_delta_ms": 0.183, + "weight": 24.28 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_2", + "count": 28, + "mean_delta_ms": 1.199, + "std_delta_ms": 0.183, + "weight": 24.29 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_2.keys", + "count": 28, + "mean_delta_ms": 1.205, + "std_delta_ms": 0.183, + "weight": 24.3 + }, + { + "source": "server.layer_3.k", + "target": "server.kv_cache_2.values", + "count": 28, + "mean_delta_ms": 1.21, + "std_delta_ms": 0.184, + "weight": 24.31 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_2.ffn_up", + "count": 28, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.184, + "weight": 24.32 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_2.ffn_down", + "count": 28, + "mean_delta_ms": 1.23, + "std_delta_ms": 0.186, + "weight": 24.32 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_3", + "count": 27, + "mean_delta_ms": 1.378, + "std_delta_ms": 0.122, + "weight": 24.81 + }, + { + "source": "server.layer_3.k", + "target": "server.layer_3.q", + "count": 27, + "mean_delta_ms": 1.386, + "std_delta_ms": 0.122, + "weight": 24.81 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_3", + "count": 57, + "mean_delta_ms": 0.665, + "std_delta_ms": 0.7, + "weight": 27.78 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_3.keys", + "count": 57, + "mean_delta_ms": 0.671, + "std_delta_ms": 0.7, + "weight": 27.9 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.676, + "std_delta_ms": 0.7, + "weight": 28.01 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.681, + "std_delta_ms": 0.699, + "weight": 28.14 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.687, + "std_delta_ms": 0.699, + "weight": 28.25 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_4", + "count": 57, + "mean_delta_ms": 0.873, + "std_delta_ms": 0.702, + "weight": 31.59 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_4.q", + "count": 57, + "mean_delta_ms": 0.88, + "std_delta_ms": 0.702, + "weight": 31.7 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_4.k", + "count": 57, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.702, + "weight": 31.79 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_4.v", + "count": 57, + "mean_delta_ms": 0.891, + "std_delta_ms": 0.702, + "weight": 31.87 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_4", + "count": 57, + "mean_delta_ms": 0.897, + "std_delta_ms": 0.702, + "weight": 31.98 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_4.keys", + "count": 56, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.693, + "weight": 31.39 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_4.values", + "count": 55, + "mean_delta_ms": 0.868, + "std_delta_ms": 0.682, + "weight": 30.8 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_4.ffn_up", + "count": 55, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.682, + "weight": 30.88 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_4.ffn_down", + "count": 55, + "mean_delta_ms": 0.879, + "std_delta_ms": 0.682, + "weight": 30.96 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_5", + "count": 52, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.663, + "weight": 31.49 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_5.q", + "count": 51, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.655, + "weight": 30.9 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_5.k", + "count": 51, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.655, + "weight": 30.97 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_5.v", + "count": 51, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.655, + "weight": 31.03 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_5", + "count": 51, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.655, + "weight": 31.1 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_5.keys", + "count": 51, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.655, + "weight": 31.17 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_5.values", + "count": 50, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.646, + "weight": 30.54 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_5.ffn_up", + "count": 50, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.646, + "weight": 30.6 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_5.ffn_down", + "count": 49, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.637, + "weight": 29.98 + }, + { + "source": "server.layer_3.v", + "target": "server.buffer", + "count": 46, + "mean_delta_ms": 1.071, + "std_delta_ms": 0.579, + "weight": 29.85 + }, + { + "source": "server.layer_3.v", + "target": "server.buffer.logits", + "count": 43, + "mean_delta_ms": 1.077, + "std_delta_ms": 0.58, + "weight": 27.95 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_0", + "count": 39, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.549, + "weight": 25.47 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_0.q", + "count": 39, + "mean_delta_ms": 1.039, + "std_delta_ms": 0.549, + "weight": 25.52 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_0.k", + "count": 39, + "mean_delta_ms": 1.044, + "std_delta_ms": 0.549, + "weight": 25.56 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_0.v", + "count": 39, + "mean_delta_ms": 1.052, + "std_delta_ms": 0.548, + "weight": 25.64 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_0", + "count": 39, + "mean_delta_ms": 1.058, + "std_delta_ms": 0.548, + "weight": 25.69 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_0.keys", + "count": 38, + "mean_delta_ms": 1.039, + "std_delta_ms": 0.534, + "weight": 25.11 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_0.values", + "count": 37, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.517, + "weight": 24.54 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_0.ffn_up", + "count": 37, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.517, + "weight": 24.58 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_0.ffn_down", + "count": 36, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.498, + "weight": 24.05 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.211, + "weight": 23.74 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 0.959, + "std_delta_ms": 0.212, + "weight": 23.75 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 0.965, + "std_delta_ms": 0.212, + "weight": 23.77 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 0.97, + "std_delta_ms": 0.212, + "weight": 23.79 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 0.977, + "std_delta_ms": 0.213, + "weight": 23.81 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 0.982, + "std_delta_ms": 0.213, + "weight": 23.83 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 0.987, + "std_delta_ms": 0.213, + "weight": 23.84 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 0.992, + "std_delta_ms": 0.214, + "weight": 23.86 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 0.997, + "std_delta_ms": 0.214, + "weight": 23.87 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 1.196, + "std_delta_ms": 0.23, + "weight": 24.33 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 1.203, + "std_delta_ms": 0.231, + "weight": 24.33 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 1.209, + "std_delta_ms": 0.231, + "weight": 24.34 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 1.214, + "std_delta_ms": 0.232, + "weight": 24.36 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_2", + "count": 28, + "mean_delta_ms": 1.193, + "std_delta_ms": 0.182, + "weight": 24.3 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_2.keys", + "count": 28, + "mean_delta_ms": 1.198, + "std_delta_ms": 0.182, + "weight": 24.31 + }, + { + "source": "server.layer_3.v", + "target": "server.kv_cache_2.values", + "count": 28, + "mean_delta_ms": 1.203, + "std_delta_ms": 0.182, + "weight": 24.32 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_2.ffn_up", + "count": 28, + "mean_delta_ms": 1.208, + "std_delta_ms": 0.182, + "weight": 24.33 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_2.ffn_down", + "count": 28, + "mean_delta_ms": 1.224, + "std_delta_ms": 0.185, + "weight": 24.32 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_3", + "count": 27, + "mean_delta_ms": 1.371, + "std_delta_ms": 0.119, + "weight": 24.84 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_3.q", + "count": 27, + "mean_delta_ms": 1.379, + "std_delta_ms": 0.12, + "weight": 24.84 + }, + { + "source": "server.layer_3.v", + "target": "server.layer_3.k", + "count": 27, + "mean_delta_ms": 1.385, + "std_delta_ms": 0.12, + "weight": 24.85 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_3.keys", + "count": 57, + "mean_delta_ms": 0.664, + "std_delta_ms": 0.7, + "weight": 27.76 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.669, + "std_delta_ms": 0.7, + "weight": 27.87 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.675, + "std_delta_ms": 0.699, + "weight": 28.0 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.68, + "std_delta_ms": 0.699, + "weight": 28.11 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_4", + "count": 57, + "mean_delta_ms": 0.866, + "std_delta_ms": 0.702, + "weight": 31.49 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_4.q", + "count": 57, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.702, + "weight": 31.6 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_4.k", + "count": 57, + "mean_delta_ms": 0.879, + "std_delta_ms": 0.702, + "weight": 31.69 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_4.v", + "count": 57, + "mean_delta_ms": 0.884, + "std_delta_ms": 0.702, + "weight": 31.77 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_4", + "count": 57, + "mean_delta_ms": 0.891, + "std_delta_ms": 0.702, + "weight": 31.88 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_4.keys", + "count": 57, + "mean_delta_ms": 0.896, + "std_delta_ms": 0.702, + "weight": 31.97 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_4.values", + "count": 56, + "mean_delta_ms": 0.882, + "std_delta_ms": 0.693, + "weight": 31.37 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_4.ffn_up", + "count": 56, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.693, + "weight": 31.45 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_4.ffn_down", + "count": 55, + "mean_delta_ms": 0.872, + "std_delta_ms": 0.682, + "weight": 30.86 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_5", + "count": 52, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.663, + "weight": 31.41 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_5.q", + "count": 52, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.663, + "weight": 31.52 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_5.k", + "count": 52, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.662, + "weight": 31.58 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_5.v", + "count": 51, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.655, + "weight": 30.95 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_5", + "count": 51, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.655, + "weight": 31.03 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_5.keys", + "count": 51, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.655, + "weight": 31.09 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_5.values", + "count": 51, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.654, + "weight": 31.16 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_5.ffn_up", + "count": 51, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.654, + "weight": 31.21 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_5.ffn_down", + "count": 50, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.646, + "weight": 30.58 + }, + { + "source": "server.kv_cache_3", + "target": "server.buffer", + "count": 46, + "mean_delta_ms": 1.064, + "std_delta_ms": 0.579, + "weight": 29.79 + }, + { + "source": "server.kv_cache_3", + "target": "server.buffer.logits", + "count": 43, + "mean_delta_ms": 1.07, + "std_delta_ms": 0.58, + "weight": 27.89 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_0", + "count": 40, + "mean_delta_ms": 1.051, + "std_delta_ms": 0.563, + "weight": 26.05 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_0.q", + "count": 40, + "mean_delta_ms": 1.057, + "std_delta_ms": 0.563, + "weight": 26.1 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_0.k", + "count": 39, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.549, + "weight": 25.5 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_0.v", + "count": 39, + "mean_delta_ms": 1.045, + "std_delta_ms": 0.548, + "weight": 25.58 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_0", + "count": 39, + "mean_delta_ms": 1.052, + "std_delta_ms": 0.548, + "weight": 25.64 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_0.keys", + "count": 39, + "mean_delta_ms": 1.058, + "std_delta_ms": 0.548, + "weight": 25.68 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_0.values", + "count": 38, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.534, + "weight": 25.09 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_0.ffn_up", + "count": 37, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.517, + "weight": 24.53 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_0.ffn_down", + "count": 37, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.517, + "weight": 24.57 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 0.946, + "std_delta_ms": 0.211, + "weight": 23.71 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 0.953, + "std_delta_ms": 0.212, + "weight": 23.73 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 0.958, + "std_delta_ms": 0.212, + "weight": 23.74 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 0.963, + "std_delta_ms": 0.212, + "weight": 23.76 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 0.97, + "std_delta_ms": 0.213, + "weight": 23.78 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 0.975, + "std_delta_ms": 0.213, + "weight": 23.8 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 0.98, + "std_delta_ms": 0.213, + "weight": 23.82 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 0.985, + "std_delta_ms": 0.214, + "weight": 23.83 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 0.99, + "std_delta_ms": 0.214, + "weight": 23.85 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 1.19, + "std_delta_ms": 0.23, + "weight": 24.31 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 1.197, + "std_delta_ms": 0.231, + "weight": 24.31 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 1.202, + "std_delta_ms": 0.231, + "weight": 24.32 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 1.208, + "std_delta_ms": 0.231, + "weight": 24.34 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_2", + "count": 29, + "mean_delta_ms": 1.214, + "std_delta_ms": 0.232, + "weight": 24.35 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_2.keys", + "count": 28, + "mean_delta_ms": 1.192, + "std_delta_ms": 0.182, + "weight": 24.29 + }, + { + "source": "server.kv_cache_3", + "target": "server.kv_cache_2.values", + "count": 28, + "mean_delta_ms": 1.197, + "std_delta_ms": 0.182, + "weight": 24.3 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_2.ffn_up", + "count": 28, + "mean_delta_ms": 1.202, + "std_delta_ms": 0.182, + "weight": 24.32 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_2.ffn_down", + "count": 28, + "mean_delta_ms": 1.217, + "std_delta_ms": 0.185, + "weight": 24.31 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_3", + "count": 27, + "mean_delta_ms": 1.365, + "std_delta_ms": 0.119, + "weight": 24.83 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_3.q", + "count": 27, + "mean_delta_ms": 1.373, + "std_delta_ms": 0.12, + "weight": 24.83 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_3.k", + "count": 27, + "mean_delta_ms": 1.378, + "std_delta_ms": 0.12, + "weight": 24.84 + }, + { + "source": "server.kv_cache_3", + "target": "server.layer_3.v", + "count": 27, + "mean_delta_ms": 1.385, + "std_delta_ms": 0.121, + "weight": 24.83 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_3.values", + "count": 57, + "mean_delta_ms": 0.664, + "std_delta_ms": 0.699, + "weight": 27.76 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.67, + "std_delta_ms": 0.699, + "weight": 27.89 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.675, + "std_delta_ms": 0.699, + "weight": 28.0 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_4", + "count": 57, + "mean_delta_ms": 0.861, + "std_delta_ms": 0.702, + "weight": 31.4 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_4.q", + "count": 57, + "mean_delta_ms": 0.868, + "std_delta_ms": 0.702, + "weight": 31.51 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_4.k", + "count": 57, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.702, + "weight": 31.6 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_4.v", + "count": 57, + "mean_delta_ms": 0.879, + "std_delta_ms": 0.702, + "weight": 31.68 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_4", + "count": 57, + "mean_delta_ms": 0.885, + "std_delta_ms": 0.702, + "weight": 31.79 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_4.keys", + "count": 57, + "mean_delta_ms": 0.891, + "std_delta_ms": 0.702, + "weight": 31.88 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_4.values", + "count": 57, + "mean_delta_ms": 0.896, + "std_delta_ms": 0.702, + "weight": 31.97 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_4.ffn_up", + "count": 56, + "mean_delta_ms": 0.882, + "std_delta_ms": 0.693, + "weight": 31.37 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_4.ffn_down", + "count": 56, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.693, + "weight": 31.45 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_5", + "count": 52, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.663, + "weight": 31.35 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_5.q", + "count": 52, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.662, + "weight": 31.45 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_5.k", + "count": 52, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.662, + "weight": 31.52 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_5.v", + "count": 52, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.662, + "weight": 31.58 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_5", + "count": 51, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.654, + "weight": 30.96 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_5.keys", + "count": 51, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.654, + "weight": 31.03 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_5.values", + "count": 51, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.654, + "weight": 31.09 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_5.ffn_up", + "count": 51, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.654, + "weight": 31.15 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_5.ffn_down", + "count": 51, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.654, + "weight": 31.21 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.buffer", + "count": 46, + "mean_delta_ms": 1.059, + "std_delta_ms": 0.58, + "weight": 29.73 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.buffer.logits", + "count": 43, + "mean_delta_ms": 1.065, + "std_delta_ms": 0.58, + "weight": 27.84 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_0", + "count": 41, + "mean_delta_ms": 1.069, + "std_delta_ms": 0.575, + "weight": 26.66 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_0.q", + "count": 40, + "mean_delta_ms": 1.052, + "std_delta_ms": 0.563, + "weight": 26.05 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_0.k", + "count": 39, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.549, + "weight": 25.46 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_0.v", + "count": 39, + "mean_delta_ms": 1.04, + "std_delta_ms": 0.548, + "weight": 25.53 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_0", + "count": 39, + "mean_delta_ms": 1.047, + "std_delta_ms": 0.548, + "weight": 25.59 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_0.keys", + "count": 39, + "mean_delta_ms": 1.052, + "std_delta_ms": 0.549, + "weight": 25.64 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_0.values", + "count": 39, + "mean_delta_ms": 1.057, + "std_delta_ms": 0.549, + "weight": 25.68 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_0.ffn_up", + "count": 38, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.534, + "weight": 25.09 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_0.ffn_down", + "count": 37, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.517, + "weight": 24.52 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 0.94, + "std_delta_ms": 0.211, + "weight": 23.69 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 0.947, + "std_delta_ms": 0.212, + "weight": 23.71 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 0.953, + "std_delta_ms": 0.212, + "weight": 23.72 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 0.958, + "std_delta_ms": 0.212, + "weight": 23.74 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 0.965, + "std_delta_ms": 0.213, + "weight": 23.76 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 0.97, + "std_delta_ms": 0.213, + "weight": 23.78 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 0.975, + "std_delta_ms": 0.213, + "weight": 23.8 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 0.98, + "std_delta_ms": 0.213, + "weight": 23.81 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 0.985, + "std_delta_ms": 0.214, + "weight": 23.83 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 1.184, + "std_delta_ms": 0.229, + "weight": 24.29 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 1.191, + "std_delta_ms": 0.231, + "weight": 24.3 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 1.197, + "std_delta_ms": 0.231, + "weight": 24.31 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 1.202, + "std_delta_ms": 0.231, + "weight": 24.32 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_2", + "count": 29, + "mean_delta_ms": 1.209, + "std_delta_ms": 0.232, + "weight": 24.34 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_2.keys", + "count": 29, + "mean_delta_ms": 1.214, + "std_delta_ms": 0.232, + "weight": 24.35 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_2.values", + "count": 28, + "mean_delta_ms": 1.191, + "std_delta_ms": 0.182, + "weight": 24.29 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_2.ffn_up", + "count": 28, + "mean_delta_ms": 1.196, + "std_delta_ms": 0.182, + "weight": 24.31 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_2.ffn_down", + "count": 28, + "mean_delta_ms": 1.212, + "std_delta_ms": 0.185, + "weight": 24.3 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_3", + "count": 27, + "mean_delta_ms": 1.359, + "std_delta_ms": 0.119, + "weight": 24.83 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_3.q", + "count": 27, + "mean_delta_ms": 1.367, + "std_delta_ms": 0.119, + "weight": 24.83 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_3.k", + "count": 27, + "mean_delta_ms": 1.373, + "std_delta_ms": 0.119, + "weight": 24.84 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.layer_3.v", + "count": 27, + "mean_delta_ms": 1.379, + "std_delta_ms": 0.121, + "weight": 24.83 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.kv_cache_3", + "count": 27, + "mean_delta_ms": 1.386, + "std_delta_ms": 0.121, + "weight": 24.83 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_3.ffn_up", + "count": 57, + "mean_delta_ms": 0.664, + "std_delta_ms": 0.699, + "weight": 27.78 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.67, + "std_delta_ms": 0.699, + "weight": 27.89 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_4", + "count": 57, + "mean_delta_ms": 0.856, + "std_delta_ms": 0.702, + "weight": 31.32 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_4.q", + "count": 57, + "mean_delta_ms": 0.863, + "std_delta_ms": 0.702, + "weight": 31.43 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_4.k", + "count": 57, + "mean_delta_ms": 0.869, + "std_delta_ms": 0.702, + "weight": 31.52 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_4.v", + "count": 57, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.702, + "weight": 31.6 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_4", + "count": 57, + "mean_delta_ms": 0.88, + "std_delta_ms": 0.702, + "weight": 31.71 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_4.keys", + "count": 57, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.702, + "weight": 31.8 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_4.values", + "count": 57, + "mean_delta_ms": 0.891, + "std_delta_ms": 0.702, + "weight": 31.89 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_4.ffn_up", + "count": 57, + "mean_delta_ms": 0.897, + "std_delta_ms": 0.702, + "weight": 31.97 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_4.ffn_down", + "count": 57, + "mean_delta_ms": 0.902, + "std_delta_ms": 0.702, + "weight": 32.05 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_5", + "count": 52, + "mean_delta_ms": 1.0, + "std_delta_ms": 0.663, + "weight": 31.28 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_5.q", + "count": 52, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.662, + "weight": 31.39 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_5.k", + "count": 52, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.662, + "weight": 31.46 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_5.v", + "count": 52, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.662, + "weight": 31.52 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_5", + "count": 52, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.662, + "weight": 31.59 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_5.keys", + "count": 51, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.654, + "weight": 30.97 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_5.values", + "count": 51, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.654, + "weight": 31.03 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_5.ffn_up", + "count": 51, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.654, + "weight": 31.09 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_5.ffn_down", + "count": 51, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.654, + "weight": 31.15 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.buffer", + "count": 47, + "mean_delta_ms": 1.074, + "std_delta_ms": 0.589, + "weight": 30.35 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.buffer.logits", + "count": 43, + "mean_delta_ms": 1.06, + "std_delta_ms": 0.58, + "weight": 27.79 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_0", + "count": 41, + "mean_delta_ms": 1.064, + "std_delta_ms": 0.575, + "weight": 26.61 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_0.q", + "count": 41, + "mean_delta_ms": 1.07, + "std_delta_ms": 0.575, + "weight": 26.66 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_0.k", + "count": 40, + "mean_delta_ms": 1.051, + "std_delta_ms": 0.563, + "weight": 26.05 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_0.v", + "count": 39, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.549, + "weight": 25.49 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_0", + "count": 39, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.548, + "weight": 25.55 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_0.keys", + "count": 39, + "mean_delta_ms": 1.047, + "std_delta_ms": 0.549, + "weight": 25.59 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_0.values", + "count": 39, + "mean_delta_ms": 1.052, + "std_delta_ms": 0.549, + "weight": 25.63 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_0.ffn_up", + "count": 39, + "mean_delta_ms": 1.057, + "std_delta_ms": 0.549, + "weight": 25.67 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_0.ffn_down", + "count": 38, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.534, + "weight": 25.08 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 0.935, + "std_delta_ms": 0.211, + "weight": 23.67 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 0.942, + "std_delta_ms": 0.212, + "weight": 23.68 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 0.948, + "std_delta_ms": 0.212, + "weight": 23.7 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 0.953, + "std_delta_ms": 0.212, + "weight": 23.72 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 0.96, + "std_delta_ms": 0.213, + "weight": 23.74 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 0.965, + "std_delta_ms": 0.213, + "weight": 23.76 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 0.97, + "std_delta_ms": 0.213, + "weight": 23.78 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 0.975, + "std_delta_ms": 0.213, + "weight": 23.79 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 0.98, + "std_delta_ms": 0.214, + "weight": 23.81 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 1.179, + "std_delta_ms": 0.229, + "weight": 24.28 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 1.186, + "std_delta_ms": 0.23, + "weight": 24.28 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 1.192, + "std_delta_ms": 0.231, + "weight": 24.29 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 1.197, + "std_delta_ms": 0.231, + "weight": 24.31 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_2", + "count": 29, + "mean_delta_ms": 1.204, + "std_delta_ms": 0.232, + "weight": 24.32 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_2.keys", + "count": 29, + "mean_delta_ms": 1.209, + "std_delta_ms": 0.232, + "weight": 24.33 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_2.values", + "count": 28, + "mean_delta_ms": 1.186, + "std_delta_ms": 0.182, + "weight": 24.28 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_2.ffn_up", + "count": 28, + "mean_delta_ms": 1.191, + "std_delta_ms": 0.182, + "weight": 24.3 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_2.ffn_down", + "count": 28, + "mean_delta_ms": 1.207, + "std_delta_ms": 0.185, + "weight": 24.29 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_3", + "count": 27, + "mean_delta_ms": 1.354, + "std_delta_ms": 0.118, + "weight": 24.83 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_3.q", + "count": 27, + "mean_delta_ms": 1.362, + "std_delta_ms": 0.119, + "weight": 24.83 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_3.k", + "count": 27, + "mean_delta_ms": 1.368, + "std_delta_ms": 0.119, + "weight": 24.84 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.layer_3.v", + "count": 27, + "mean_delta_ms": 1.374, + "std_delta_ms": 0.12, + "weight": 24.83 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_3", + "count": 27, + "mean_delta_ms": 1.381, + "std_delta_ms": 0.121, + "weight": 24.83 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.kv_cache_3.keys", + "count": 27, + "mean_delta_ms": 1.386, + "std_delta_ms": 0.121, + "weight": 24.84 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_3.ffn_down", + "count": 57, + "mean_delta_ms": 0.663, + "std_delta_ms": 0.699, + "weight": 27.76 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_4", + "count": 57, + "mean_delta_ms": 0.85, + "std_delta_ms": 0.702, + "weight": 31.21 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_4.q", + "count": 57, + "mean_delta_ms": 0.857, + "std_delta_ms": 0.702, + "weight": 31.33 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_4.k", + "count": 57, + "mean_delta_ms": 0.862, + "std_delta_ms": 0.702, + "weight": 31.42 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_4.v", + "count": 57, + "mean_delta_ms": 0.867, + "std_delta_ms": 0.702, + "weight": 31.5 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_4", + "count": 57, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.702, + "weight": 31.61 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_4.keys", + "count": 57, + "mean_delta_ms": 0.88, + "std_delta_ms": 0.702, + "weight": 31.7 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_4.values", + "count": 57, + "mean_delta_ms": 0.885, + "std_delta_ms": 0.702, + "weight": 31.79 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_4.ffn_up", + "count": 57, + "mean_delta_ms": 0.89, + "std_delta_ms": 0.702, + "weight": 31.87 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_4.ffn_down", + "count": 57, + "mean_delta_ms": 0.895, + "std_delta_ms": 0.702, + "weight": 31.95 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_5", + "count": 53, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.67, + "weight": 31.9 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_5.q", + "count": 52, + "mean_delta_ms": 1.003, + "std_delta_ms": 0.662, + "weight": 31.32 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_5.k", + "count": 52, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.662, + "weight": 31.39 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_5.v", + "count": 52, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.662, + "weight": 31.45 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_5", + "count": 52, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.662, + "weight": 31.52 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_5.keys", + "count": 52, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.662, + "weight": 31.59 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_5.values", + "count": 51, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.654, + "weight": 30.96 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_5.ffn_up", + "count": 51, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.654, + "weight": 31.02 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_5.ffn_down", + "count": 51, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.654, + "weight": 31.08 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.buffer", + "count": 48, + "mean_delta_ms": 1.087, + "std_delta_ms": 0.598, + "weight": 30.96 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.buffer.logits", + "count": 45, + "mean_delta_ms": 1.096, + "std_delta_ms": 0.6, + "weight": 29.08 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_0", + "count": 42, + "mean_delta_ms": 1.081, + "std_delta_ms": 0.587, + "weight": 27.22 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_0.q", + "count": 41, + "mean_delta_ms": 1.064, + "std_delta_ms": 0.576, + "weight": 26.6 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_0.k", + "count": 41, + "mean_delta_ms": 1.069, + "std_delta_ms": 0.576, + "weight": 26.64 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_0.v", + "count": 40, + "mean_delta_ms": 1.053, + "std_delta_ms": 0.563, + "weight": 26.07 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_0", + "count": 39, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.549, + "weight": 25.49 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_0.keys", + "count": 39, + "mean_delta_ms": 1.042, + "std_delta_ms": 0.549, + "weight": 25.53 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_0.values", + "count": 39, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.549, + "weight": 25.57 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_0.ffn_up", + "count": 39, + "mean_delta_ms": 1.051, + "std_delta_ms": 0.549, + "weight": 25.61 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_0.ffn_down", + "count": 39, + "mean_delta_ms": 1.056, + "std_delta_ms": 0.549, + "weight": 25.65 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 0.929, + "std_delta_ms": 0.21, + "weight": 23.65 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 0.936, + "std_delta_ms": 0.211, + "weight": 23.66 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 0.941, + "std_delta_ms": 0.211, + "weight": 23.68 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 0.946, + "std_delta_ms": 0.212, + "weight": 23.7 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 0.953, + "std_delta_ms": 0.212, + "weight": 23.72 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 0.959, + "std_delta_ms": 0.212, + "weight": 23.74 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 0.963, + "std_delta_ms": 0.212, + "weight": 23.76 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 0.968, + "std_delta_ms": 0.213, + "weight": 23.78 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 0.973, + "std_delta_ms": 0.213, + "weight": 23.79 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 1.173, + "std_delta_ms": 0.229, + "weight": 24.27 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 1.18, + "std_delta_ms": 0.23, + "weight": 24.27 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 1.185, + "std_delta_ms": 0.23, + "weight": 24.28 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 1.191, + "std_delta_ms": 0.231, + "weight": 24.29 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_2", + "count": 29, + "mean_delta_ms": 1.197, + "std_delta_ms": 0.231, + "weight": 24.31 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_2.keys", + "count": 29, + "mean_delta_ms": 1.203, + "std_delta_ms": 0.232, + "weight": 24.32 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_2.values", + "count": 29, + "mean_delta_ms": 1.208, + "std_delta_ms": 0.232, + "weight": 24.33 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_2.ffn_up", + "count": 28, + "mean_delta_ms": 1.185, + "std_delta_ms": 0.181, + "weight": 24.29 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_2.ffn_down", + "count": 28, + "mean_delta_ms": 1.2, + "std_delta_ms": 0.184, + "weight": 24.28 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_3", + "count": 27, + "mean_delta_ms": 1.348, + "std_delta_ms": 0.117, + "weight": 24.84 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_3.q", + "count": 27, + "mean_delta_ms": 1.356, + "std_delta_ms": 0.118, + "weight": 24.83 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_3.k", + "count": 27, + "mean_delta_ms": 1.361, + "std_delta_ms": 0.118, + "weight": 24.84 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.layer_3.v", + "count": 27, + "mean_delta_ms": 1.368, + "std_delta_ms": 0.119, + "weight": 24.83 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_3", + "count": 27, + "mean_delta_ms": 1.374, + "std_delta_ms": 0.12, + "weight": 24.84 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_3.keys", + "count": 27, + "mean_delta_ms": 1.379, + "std_delta_ms": 0.12, + "weight": 24.84 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.kv_cache_3.values", + "count": 27, + "mean_delta_ms": 1.384, + "std_delta_ms": 0.12, + "weight": 24.84 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_4", + "count": 57, + "mean_delta_ms": 0.844, + "std_delta_ms": 0.702, + "weight": 31.13 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_4.q", + "count": 57, + "mean_delta_ms": 0.851, + "std_delta_ms": 0.702, + "weight": 31.24 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_4.k", + "count": 57, + "mean_delta_ms": 0.857, + "std_delta_ms": 0.702, + "weight": 31.34 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_4.v", + "count": 57, + "mean_delta_ms": 0.862, + "std_delta_ms": 0.702, + "weight": 31.42 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_4", + "count": 57, + "mean_delta_ms": 0.869, + "std_delta_ms": 0.702, + "weight": 31.53 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_4.keys", + "count": 57, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.702, + "weight": 31.62 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_4.values", + "count": 57, + "mean_delta_ms": 0.88, + "std_delta_ms": 0.702, + "weight": 31.71 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_4.ffn_up", + "count": 57, + "mean_delta_ms": 0.885, + "std_delta_ms": 0.702, + "weight": 31.79 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_4.ffn_down", + "count": 57, + "mean_delta_ms": 0.89, + "std_delta_ms": 0.702, + "weight": 31.87 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_5", + "count": 53, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.67, + "weight": 31.84 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_5.q", + "count": 53, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.67, + "weight": 31.95 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_5.k", + "count": 52, + "mean_delta_ms": 1.003, + "std_delta_ms": 0.662, + "weight": 31.32 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_5.v", + "count": 52, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.662, + "weight": 31.38 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_5", + "count": 52, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.662, + "weight": 31.46 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_5.keys", + "count": 52, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.662, + "weight": 31.53 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_5.values", + "count": 51, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.654, + "weight": 30.9 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_5.ffn_up", + "count": 51, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.654, + "weight": 30.96 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_5.ffn_down", + "count": 51, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.654, + "weight": 31.02 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.buffer", + "count": 48, + "mean_delta_ms": 1.082, + "std_delta_ms": 0.598, + "weight": 30.91 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.buffer.logits", + "count": 45, + "mean_delta_ms": 1.091, + "std_delta_ms": 0.6, + "weight": 29.03 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_0", + "count": 43, + "mean_delta_ms": 1.097, + "std_delta_ms": 0.596, + "weight": 27.86 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_0.q", + "count": 42, + "mean_delta_ms": 1.081, + "std_delta_ms": 0.587, + "weight": 27.23 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_0.k", + "count": 42, + "mean_delta_ms": 1.086, + "std_delta_ms": 0.587, + "weight": 27.27 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_0.v", + "count": 41, + "mean_delta_ms": 1.071, + "std_delta_ms": 0.575, + "weight": 26.68 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_0", + "count": 40, + "mean_delta_ms": 1.055, + "std_delta_ms": 0.563, + "weight": 26.08 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_0.keys", + "count": 39, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.549, + "weight": 25.49 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_0.values", + "count": 39, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.549, + "weight": 25.53 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_0.ffn_up", + "count": 39, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.549, + "weight": 25.57 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_0.ffn_down", + "count": 39, + "mean_delta_ms": 1.051, + "std_delta_ms": 0.55, + "weight": 25.61 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_1", + "count": 29, + "mean_delta_ms": 0.924, + "std_delta_ms": 0.21, + "weight": 23.63 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_1.q", + "count": 29, + "mean_delta_ms": 0.931, + "std_delta_ms": 0.211, + "weight": 23.64 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_1.k", + "count": 29, + "mean_delta_ms": 0.936, + "std_delta_ms": 0.211, + "weight": 23.66 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_1.v", + "count": 29, + "mean_delta_ms": 0.941, + "std_delta_ms": 0.212, + "weight": 23.68 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_1", + "count": 29, + "mean_delta_ms": 0.948, + "std_delta_ms": 0.212, + "weight": 23.7 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_1.keys", + "count": 29, + "mean_delta_ms": 0.953, + "std_delta_ms": 0.212, + "weight": 23.72 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_1.values", + "count": 29, + "mean_delta_ms": 0.958, + "std_delta_ms": 0.212, + "weight": 23.74 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_1.ffn_up", + "count": 29, + "mean_delta_ms": 0.963, + "std_delta_ms": 0.213, + "weight": 23.75 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_1.ffn_down", + "count": 29, + "mean_delta_ms": 0.968, + "std_delta_ms": 0.213, + "weight": 23.77 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 1.168, + "std_delta_ms": 0.229, + "weight": 24.25 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 1.175, + "std_delta_ms": 0.23, + "weight": 24.25 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 1.18, + "std_delta_ms": 0.23, + "weight": 24.26 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 1.186, + "std_delta_ms": 0.231, + "weight": 24.28 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_2", + "count": 29, + "mean_delta_ms": 1.192, + "std_delta_ms": 0.231, + "weight": 24.29 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_2.keys", + "count": 29, + "mean_delta_ms": 1.198, + "std_delta_ms": 0.231, + "weight": 24.3 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_2.values", + "count": 29, + "mean_delta_ms": 1.203, + "std_delta_ms": 0.232, + "weight": 24.31 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_2.ffn_up", + "count": 29, + "mean_delta_ms": 1.208, + "std_delta_ms": 0.232, + "weight": 24.33 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_2.ffn_down", + "count": 28, + "mean_delta_ms": 1.195, + "std_delta_ms": 0.184, + "weight": 24.27 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_3", + "count": 27, + "mean_delta_ms": 1.343, + "std_delta_ms": 0.117, + "weight": 24.83 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_3.q", + "count": 27, + "mean_delta_ms": 1.35, + "std_delta_ms": 0.118, + "weight": 24.83 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_3.k", + "count": 27, + "mean_delta_ms": 1.356, + "std_delta_ms": 0.118, + "weight": 24.84 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_3.v", + "count": 27, + "mean_delta_ms": 1.363, + "std_delta_ms": 0.119, + "weight": 24.83 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_3", + "count": 27, + "mean_delta_ms": 1.369, + "std_delta_ms": 0.119, + "weight": 24.83 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_3.keys", + "count": 27, + "mean_delta_ms": 1.374, + "std_delta_ms": 0.12, + "weight": 24.84 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.kv_cache_3.values", + "count": 27, + "mean_delta_ms": 1.379, + "std_delta_ms": 0.12, + "weight": 24.84 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.layer_3.ffn_up", + "count": 27, + "mean_delta_ms": 1.384, + "std_delta_ms": 0.12, + "weight": 24.85 + }, + { + "source": "server.layer_4", + "target": "server.layer_4.q", + "count": 57, + "mean_delta_ms": 0.667, + "std_delta_ms": 0.703, + "weight": 27.76 + }, + { + "source": "server.layer_4", + "target": "server.layer_4.k", + "count": 57, + "mean_delta_ms": 0.673, + "std_delta_ms": 0.703, + "weight": 27.88 + }, + { + "source": "server.layer_4", + "target": "server.layer_4.v", + "count": 57, + "mean_delta_ms": 0.678, + "std_delta_ms": 0.703, + "weight": 27.99 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_4", + "count": 57, + "mean_delta_ms": 0.685, + "std_delta_ms": 0.703, + "weight": 28.13 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_4.keys", + "count": 57, + "mean_delta_ms": 0.69, + "std_delta_ms": 0.703, + "weight": 28.25 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_4.values", + "count": 57, + "mean_delta_ms": 0.696, + "std_delta_ms": 0.703, + "weight": 28.36 + }, + { + "source": "server.layer_4", + "target": "server.layer_4.ffn_up", + "count": 57, + "mean_delta_ms": 0.701, + "std_delta_ms": 0.703, + "weight": 28.47 + }, + { + "source": "server.layer_4", + "target": "server.layer_4.ffn_down", + "count": 57, + "mean_delta_ms": 0.706, + "std_delta_ms": 0.703, + "weight": 28.57 + }, + { + "source": "server.layer_4", + "target": "server.layer_5", + "count": 56, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.698, + "weight": 31.27 + }, + { + "source": "server.layer_4", + "target": "server.layer_5.q", + "count": 56, + "mean_delta_ms": 0.892, + "std_delta_ms": 0.699, + "weight": 31.4 + }, + { + "source": "server.layer_4", + "target": "server.layer_5.k", + "count": 56, + "mean_delta_ms": 0.897, + "std_delta_ms": 0.699, + "weight": 31.48 + }, + { + "source": "server.layer_4", + "target": "server.layer_5.v", + "count": 55, + "mean_delta_ms": 0.882, + "std_delta_ms": 0.689, + "weight": 30.88 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_5", + "count": 55, + "mean_delta_ms": 0.888, + "std_delta_ms": 0.689, + "weight": 30.97 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_5.keys", + "count": 54, + "mean_delta_ms": 0.873, + "std_delta_ms": 0.678, + "weight": 30.39 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_5.values", + "count": 54, + "mean_delta_ms": 0.878, + "std_delta_ms": 0.678, + "weight": 30.47 + }, + { + "source": "server.layer_4", + "target": "server.layer_5.ffn_up", + "count": 54, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.678, + "weight": 30.54 + }, + { + "source": "server.layer_4", + "target": "server.layer_5.ffn_down", + "count": 54, + "mean_delta_ms": 0.888, + "std_delta_ms": 0.678, + "weight": 30.62 + }, + { + "source": "server.layer_4", + "target": "server.buffer", + "count": 55, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.652, + "weight": 33.65 + }, + { + "source": "server.layer_4", + "target": "server.buffer.logits", + "count": 51, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.649, + "weight": 31.23 + }, + { + "source": "server.layer_4", + "target": "server.layer_0", + "count": 49, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.65, + "weight": 30.13 + }, + { + "source": "server.layer_4", + "target": "server.layer_0.q", + "count": 48, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.641, + "weight": 29.5 + }, + { + "source": "server.layer_4", + "target": "server.layer_0.k", + "count": 48, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.641, + "weight": 29.56 + }, + { + "source": "server.layer_4", + "target": "server.layer_0.v", + "count": 48, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.641, + "weight": 29.65 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_0", + "count": 47, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.631, + "weight": 29.05 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_0.keys", + "count": 47, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.631, + "weight": 29.11 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_0.values", + "count": 47, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.631, + "weight": 29.16 + }, + { + "source": "server.layer_4", + "target": "server.layer_0.ffn_up", + "count": 47, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.631, + "weight": 29.21 + }, + { + "source": "server.layer_4", + "target": "server.layer_0.ffn_down", + "count": 47, + "mean_delta_ms": 1.042, + "std_delta_ms": 0.631, + "weight": 29.27 + }, + { + "source": "server.layer_4", + "target": "server.layer_1", + "count": 39, + "mean_delta_ms": 1.047, + "std_delta_ms": 0.553, + "weight": 25.52 + }, + { + "source": "server.layer_4", + "target": "server.layer_1.q", + "count": 39, + "mean_delta_ms": 1.054, + "std_delta_ms": 0.553, + "weight": 25.58 + }, + { + "source": "server.layer_4", + "target": "server.layer_1.k", + "count": 37, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.521, + "weight": 24.39 + }, + { + "source": "server.layer_4", + "target": "server.layer_1.v", + "count": 35, + "mean_delta_ms": 0.957, + "std_delta_ms": 0.478, + "weight": 23.34 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_1", + "count": 34, + "mean_delta_ms": 0.933, + "std_delta_ms": 0.449, + "weight": 22.95 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_1.keys", + "count": 34, + "mean_delta_ms": 0.938, + "std_delta_ms": 0.449, + "weight": 22.99 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_1.values", + "count": 34, + "mean_delta_ms": 0.943, + "std_delta_ms": 0.449, + "weight": 23.03 + }, + { + "source": "server.layer_4", + "target": "server.layer_1.ffn_up", + "count": 34, + "mean_delta_ms": 0.948, + "std_delta_ms": 0.449, + "weight": 23.07 + }, + { + "source": "server.layer_4", + "target": "server.layer_1.ffn_down", + "count": 34, + "mean_delta_ms": 0.953, + "std_delta_ms": 0.449, + "weight": 23.11 + }, + { + "source": "server.layer_4", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 0.981, + "std_delta_ms": 0.207, + "weight": 23.94 + }, + { + "source": "server.layer_4", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 0.988, + "std_delta_ms": 0.209, + "weight": 23.95 + }, + { + "source": "server.layer_4", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 0.993, + "std_delta_ms": 0.209, + "weight": 23.96 + }, + { + "source": "server.layer_4", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.209, + "weight": 23.98 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_2", + "count": 29, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.21, + "weight": 24.0 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_2.keys", + "count": 29, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.21, + "weight": 24.01 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_2.values", + "count": 29, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.21, + "weight": 24.03 + }, + { + "source": "server.layer_4", + "target": "server.layer_2.ffn_up", + "count": 29, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.21, + "weight": 24.05 + }, + { + "source": "server.layer_4", + "target": "server.layer_2.ffn_down", + "count": 29, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.212, + "weight": 24.07 + }, + { + "source": "server.layer_4", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.223, + "weight": 24.5 + }, + { + "source": "server.layer_4", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 1.222, + "std_delta_ms": 0.224, + "weight": 24.51 + }, + { + "source": "server.layer_4", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 1.228, + "std_delta_ms": 0.224, + "weight": 24.53 + }, + { + "source": "server.layer_4", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 1.235, + "std_delta_ms": 0.224, + "weight": 24.54 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 1.241, + "std_delta_ms": 0.224, + "weight": 24.56 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 1.246, + "std_delta_ms": 0.225, + "weight": 24.57 + }, + { + "source": "server.layer_4", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 1.251, + "std_delta_ms": 0.225, + "weight": 24.58 + }, + { + "source": "server.layer_4", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 1.256, + "std_delta_ms": 0.225, + "weight": 24.6 + }, + { + "source": "server.layer_4", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 1.261, + "std_delta_ms": 0.225, + "weight": 24.61 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_4.k", + "count": 57, + "mean_delta_ms": 0.666, + "std_delta_ms": 0.703, + "weight": 27.73 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_4.v", + "count": 57, + "mean_delta_ms": 0.671, + "std_delta_ms": 0.703, + "weight": 27.84 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_4", + "count": 57, + "mean_delta_ms": 0.678, + "std_delta_ms": 0.703, + "weight": 27.98 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_4.keys", + "count": 57, + "mean_delta_ms": 0.683, + "std_delta_ms": 0.703, + "weight": 28.1 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_4.values", + "count": 57, + "mean_delta_ms": 0.689, + "std_delta_ms": 0.703, + "weight": 28.22 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_4.ffn_up", + "count": 57, + "mean_delta_ms": 0.694, + "std_delta_ms": 0.703, + "weight": 28.32 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_4.ffn_down", + "count": 57, + "mean_delta_ms": 0.699, + "std_delta_ms": 0.703, + "weight": 28.43 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_5", + "count": 56, + "mean_delta_ms": 0.876, + "std_delta_ms": 0.698, + "weight": 31.15 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_5.q", + "count": 56, + "mean_delta_ms": 0.885, + "std_delta_ms": 0.699, + "weight": 31.29 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_5.k", + "count": 56, + "mean_delta_ms": 0.89, + "std_delta_ms": 0.699, + "weight": 31.37 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_5.v", + "count": 56, + "mean_delta_ms": 0.895, + "std_delta_ms": 0.699, + "weight": 31.44 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_5", + "count": 55, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.689, + "weight": 30.86 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_5.keys", + "count": 55, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.689, + "weight": 30.95 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_5.values", + "count": 54, + "mean_delta_ms": 0.871, + "std_delta_ms": 0.678, + "weight": 30.36 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_5.ffn_up", + "count": 54, + "mean_delta_ms": 0.876, + "std_delta_ms": 0.678, + "weight": 30.43 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_5.ffn_down", + "count": 54, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.678, + "weight": 30.51 + }, + { + "source": "server.layer_4.q", + "target": "server.buffer", + "count": 55, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.652, + "weight": 33.56 + }, + { + "source": "server.layer_4.q", + "target": "server.buffer.logits", + "count": 51, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.649, + "weight": 31.15 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_0", + "count": 49, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.65, + "weight": 30.05 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_0.q", + "count": 49, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.65, + "weight": 30.11 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_0.k", + "count": 48, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.641, + "weight": 29.48 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_0.v", + "count": 48, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.64, + "weight": 29.58 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_0", + "count": 48, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.64, + "weight": 29.66 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_0.keys", + "count": 47, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.631, + "weight": 29.04 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_0.values", + "count": 47, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.631, + "weight": 29.09 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_0.ffn_up", + "count": 47, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.631, + "weight": 29.14 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_0.ffn_down", + "count": 47, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.631, + "weight": 29.19 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_1", + "count": 40, + "mean_delta_ms": 1.064, + "std_delta_ms": 0.566, + "weight": 26.11 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_1.q", + "count": 39, + "mean_delta_ms": 1.047, + "std_delta_ms": 0.553, + "weight": 25.52 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_1.k", + "count": 39, + "mean_delta_ms": 1.052, + "std_delta_ms": 0.553, + "weight": 25.57 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_1.v", + "count": 39, + "mean_delta_ms": 1.057, + "std_delta_ms": 0.553, + "weight": 25.61 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_1", + "count": 36, + "mean_delta_ms": 0.985, + "std_delta_ms": 0.501, + "weight": 23.87 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_1.keys", + "count": 34, + "mean_delta_ms": 0.931, + "std_delta_ms": 0.449, + "weight": 22.93 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_1.values", + "count": 34, + "mean_delta_ms": 0.936, + "std_delta_ms": 0.449, + "weight": 22.97 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_1.ffn_up", + "count": 34, + "mean_delta_ms": 0.941, + "std_delta_ms": 0.449, + "weight": 23.01 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_1.ffn_down", + "count": 34, + "mean_delta_ms": 0.946, + "std_delta_ms": 0.449, + "weight": 23.05 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 0.974, + "std_delta_ms": 0.207, + "weight": 23.92 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 0.981, + "std_delta_ms": 0.208, + "weight": 23.93 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 0.986, + "std_delta_ms": 0.208, + "weight": 23.95 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 0.992, + "std_delta_ms": 0.208, + "weight": 23.97 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_2", + "count": 29, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.209, + "weight": 23.98 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_2.keys", + "count": 29, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.209, + "weight": 24.0 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_2.values", + "count": 29, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.209, + "weight": 24.01 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_2.ffn_up", + "count": 29, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.21, + "weight": 24.03 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_2.ffn_down", + "count": 29, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.212, + "weight": 24.05 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 1.208, + "std_delta_ms": 0.222, + "weight": 24.49 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.223, + "weight": 24.51 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 1.221, + "std_delta_ms": 0.223, + "weight": 24.52 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 1.228, + "std_delta_ms": 0.223, + "weight": 24.53 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 1.234, + "std_delta_ms": 0.224, + "weight": 24.55 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 1.239, + "std_delta_ms": 0.224, + "weight": 24.56 + }, + { + "source": "server.layer_4.q", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 1.244, + "std_delta_ms": 0.224, + "weight": 24.58 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 1.249, + "std_delta_ms": 0.224, + "weight": 24.59 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 1.254, + "std_delta_ms": 0.224, + "weight": 24.6 + }, + { + "source": "server.layer_4.q", + "target": "server.layer_4", + "count": 27, + "mean_delta_ms": 1.387, + "std_delta_ms": 0.14, + "weight": 24.52 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_4.v", + "count": 57, + "mean_delta_ms": 0.666, + "std_delta_ms": 0.703, + "weight": 27.72 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_4", + "count": 57, + "mean_delta_ms": 0.672, + "std_delta_ms": 0.703, + "weight": 27.86 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_4.keys", + "count": 57, + "mean_delta_ms": 0.678, + "std_delta_ms": 0.703, + "weight": 27.98 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_4.values", + "count": 57, + "mean_delta_ms": 0.683, + "std_delta_ms": 0.703, + "weight": 28.09 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_4.ffn_up", + "count": 57, + "mean_delta_ms": 0.688, + "std_delta_ms": 0.703, + "weight": 28.2 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_4.ffn_down", + "count": 57, + "mean_delta_ms": 0.693, + "std_delta_ms": 0.703, + "weight": 28.31 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_5", + "count": 57, + "mean_delta_ms": 0.89, + "std_delta_ms": 0.708, + "weight": 31.74 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_5.q", + "count": 56, + "mean_delta_ms": 0.879, + "std_delta_ms": 0.699, + "weight": 31.19 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_5.k", + "count": 56, + "mean_delta_ms": 0.884, + "std_delta_ms": 0.699, + "weight": 31.28 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_5.v", + "count": 56, + "mean_delta_ms": 0.889, + "std_delta_ms": 0.699, + "weight": 31.35 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_5", + "count": 56, + "mean_delta_ms": 0.896, + "std_delta_ms": 0.699, + "weight": 31.45 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_5.keys", + "count": 55, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.689, + "weight": 30.86 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_5.values", + "count": 55, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.689, + "weight": 30.94 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_5.ffn_up", + "count": 54, + "mean_delta_ms": 0.87, + "std_delta_ms": 0.679, + "weight": 30.34 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_5.ffn_down", + "count": 54, + "mean_delta_ms": 0.875, + "std_delta_ms": 0.679, + "weight": 30.42 + }, + { + "source": "server.layer_4.k", + "target": "server.buffer", + "count": 56, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.659, + "weight": 34.18 + }, + { + "source": "server.layer_4.k", + "target": "server.buffer.logits", + "count": 51, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.649, + "weight": 31.08 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_0", + "count": 49, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.65, + "weight": 29.99 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_0.q", + "count": 49, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.65, + "weight": 30.05 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_0.k", + "count": 49, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.65, + "weight": 30.1 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_0.v", + "count": 48, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.64, + "weight": 29.51 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_0", + "count": 48, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.64, + "weight": 29.59 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_0.keys", + "count": 48, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.64, + "weight": 29.66 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_0.values", + "count": 47, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.631, + "weight": 29.03 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_0.ffn_up", + "count": 47, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.631, + "weight": 29.08 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_0.ffn_down", + "count": 47, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.631, + "weight": 29.13 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_1", + "count": 40, + "mean_delta_ms": 1.058, + "std_delta_ms": 0.566, + "weight": 26.06 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_1.q", + "count": 39, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.553, + "weight": 25.47 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_1.k", + "count": 39, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.553, + "weight": 25.52 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_1.v", + "count": 39, + "mean_delta_ms": 1.051, + "std_delta_ms": 0.553, + "weight": 25.56 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_1", + "count": 38, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.538, + "weight": 24.99 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_1.keys", + "count": 36, + "mean_delta_ms": 0.985, + "std_delta_ms": 0.501, + "weight": 23.87 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_1.values", + "count": 34, + "mean_delta_ms": 0.93, + "std_delta_ms": 0.449, + "weight": 22.93 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_1.ffn_up", + "count": 34, + "mean_delta_ms": 0.935, + "std_delta_ms": 0.449, + "weight": 22.97 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_1.ffn_down", + "count": 34, + "mean_delta_ms": 0.94, + "std_delta_ms": 0.449, + "weight": 23.01 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 0.968, + "std_delta_ms": 0.205, + "weight": 23.94 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 0.975, + "std_delta_ms": 0.206, + "weight": 23.95 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 0.98, + "std_delta_ms": 0.206, + "weight": 23.96 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 0.986, + "std_delta_ms": 0.206, + "weight": 23.98 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_2", + "count": 29, + "mean_delta_ms": 0.992, + "std_delta_ms": 0.207, + "weight": 24.0 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_2.keys", + "count": 29, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.207, + "weight": 24.01 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_2.values", + "count": 29, + "mean_delta_ms": 1.003, + "std_delta_ms": 0.207, + "weight": 24.03 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_2.ffn_up", + "count": 29, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.207, + "weight": 24.05 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_2.ffn_down", + "count": 29, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.21, + "weight": 24.07 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 1.202, + "std_delta_ms": 0.22, + "weight": 24.51 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 1.209, + "std_delta_ms": 0.221, + "weight": 24.52 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.221, + "weight": 24.54 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 1.222, + "std_delta_ms": 0.221, + "weight": 24.55 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 1.228, + "std_delta_ms": 0.222, + "weight": 24.57 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 1.233, + "std_delta_ms": 0.222, + "weight": 24.58 + }, + { + "source": "server.layer_4.k", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 1.238, + "std_delta_ms": 0.222, + "weight": 24.59 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 1.243, + "std_delta_ms": 0.222, + "weight": 24.61 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 1.248, + "std_delta_ms": 0.222, + "weight": 24.62 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_4", + "count": 27, + "mean_delta_ms": 1.382, + "std_delta_ms": 0.14, + "weight": 24.51 + }, + { + "source": "server.layer_4.k", + "target": "server.layer_4.q", + "count": 27, + "mean_delta_ms": 1.389, + "std_delta_ms": 0.142, + "weight": 24.5 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_4", + "count": 57, + "mean_delta_ms": 0.667, + "std_delta_ms": 0.703, + "weight": 27.75 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_4.keys", + "count": 57, + "mean_delta_ms": 0.673, + "std_delta_ms": 0.703, + "weight": 27.87 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_4.values", + "count": 57, + "mean_delta_ms": 0.678, + "std_delta_ms": 0.703, + "weight": 27.99 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_4.ffn_up", + "count": 57, + "mean_delta_ms": 0.683, + "std_delta_ms": 0.703, + "weight": 28.1 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_4.ffn_down", + "count": 57, + "mean_delta_ms": 0.688, + "std_delta_ms": 0.703, + "weight": 28.2 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_5", + "count": 57, + "mean_delta_ms": 0.885, + "std_delta_ms": 0.708, + "weight": 31.66 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_5.q", + "count": 56, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.699, + "weight": 31.11 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_5.k", + "count": 56, + "mean_delta_ms": 0.879, + "std_delta_ms": 0.699, + "weight": 31.2 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_5.v", + "count": 56, + "mean_delta_ms": 0.884, + "std_delta_ms": 0.699, + "weight": 31.27 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_5", + "count": 56, + "mean_delta_ms": 0.89, + "std_delta_ms": 0.699, + "weight": 31.37 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_5.keys", + "count": 56, + "mean_delta_ms": 0.896, + "std_delta_ms": 0.699, + "weight": 31.46 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_5.values", + "count": 55, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.689, + "weight": 30.86 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_5.ffn_up", + "count": 55, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.689, + "weight": 30.93 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_5.ffn_down", + "count": 54, + "mean_delta_ms": 0.87, + "std_delta_ms": 0.679, + "weight": 30.34 + }, + { + "source": "server.layer_4.v", + "target": "server.buffer", + "count": 56, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.659, + "weight": 34.11 + }, + { + "source": "server.layer_4.v", + "target": "server.buffer.logits", + "count": 51, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.649, + "weight": 31.02 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_0", + "count": 49, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.65, + "weight": 29.93 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_0.q", + "count": 49, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.65, + "weight": 29.99 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_0.k", + "count": 49, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.65, + "weight": 30.05 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_0.v", + "count": 49, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.649, + "weight": 30.15 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_0", + "count": 48, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.64, + "weight": 29.54 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_0.keys", + "count": 48, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.64, + "weight": 29.6 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_0.values", + "count": 48, + "mean_delta_ms": 1.035, + "std_delta_ms": 0.64, + "weight": 29.66 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_0.ffn_up", + "count": 47, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.631, + "weight": 29.02 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_0.ffn_down", + "count": 47, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.631, + "weight": 29.08 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_1", + "count": 41, + "mean_delta_ms": 1.076, + "std_delta_ms": 0.578, + "weight": 26.68 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_1.q", + "count": 40, + "mean_delta_ms": 1.06, + "std_delta_ms": 0.566, + "weight": 26.07 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_1.k", + "count": 39, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.553, + "weight": 25.48 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_1.v", + "count": 39, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.553, + "weight": 25.52 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_1", + "count": 39, + "mean_delta_ms": 1.053, + "std_delta_ms": 0.553, + "weight": 25.58 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_1.keys", + "count": 37, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.521, + "weight": 24.39 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_1.values", + "count": 36, + "mean_delta_ms": 0.985, + "std_delta_ms": 0.501, + "weight": 23.87 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_1.ffn_up", + "count": 35, + "mean_delta_ms": 0.961, + "std_delta_ms": 0.477, + "weight": 23.39 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_1.ffn_down", + "count": 34, + "mean_delta_ms": 0.935, + "std_delta_ms": 0.449, + "weight": 22.97 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 0.963, + "std_delta_ms": 0.204, + "weight": 23.92 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 0.97, + "std_delta_ms": 0.205, + "weight": 23.93 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 0.975, + "std_delta_ms": 0.206, + "weight": 23.95 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 0.981, + "std_delta_ms": 0.206, + "weight": 23.97 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_2", + "count": 29, + "mean_delta_ms": 0.987, + "std_delta_ms": 0.206, + "weight": 23.98 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_2.keys", + "count": 29, + "mean_delta_ms": 0.993, + "std_delta_ms": 0.207, + "weight": 24.0 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_2.values", + "count": 29, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.207, + "weight": 24.01 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_2.ffn_up", + "count": 29, + "mean_delta_ms": 1.003, + "std_delta_ms": 0.207, + "weight": 24.03 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_2.ffn_down", + "count": 29, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.209, + "weight": 24.05 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 1.197, + "std_delta_ms": 0.22, + "weight": 24.5 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 1.204, + "std_delta_ms": 0.22, + "weight": 24.51 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 1.21, + "std_delta_ms": 0.22, + "weight": 24.53 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 1.217, + "std_delta_ms": 0.221, + "weight": 24.54 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 1.223, + "std_delta_ms": 0.221, + "weight": 24.56 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 1.228, + "std_delta_ms": 0.221, + "weight": 24.57 + }, + { + "source": "server.layer_4.v", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 1.233, + "std_delta_ms": 0.222, + "weight": 24.58 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 1.238, + "std_delta_ms": 0.222, + "weight": 24.6 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 1.243, + "std_delta_ms": 0.222, + "weight": 24.61 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_4", + "count": 27, + "mean_delta_ms": 1.377, + "std_delta_ms": 0.14, + "weight": 24.51 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_4.q", + "count": 27, + "mean_delta_ms": 1.384, + "std_delta_ms": 0.141, + "weight": 24.5 + }, + { + "source": "server.layer_4.v", + "target": "server.layer_4.k", + "count": 27, + "mean_delta_ms": 1.389, + "std_delta_ms": 0.143, + "weight": 24.47 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_4.keys", + "count": 57, + "mean_delta_ms": 0.666, + "std_delta_ms": 0.703, + "weight": 27.73 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_4.values", + "count": 57, + "mean_delta_ms": 0.671, + "std_delta_ms": 0.703, + "weight": 27.84 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_4.ffn_up", + "count": 57, + "mean_delta_ms": 0.676, + "std_delta_ms": 0.703, + "weight": 27.95 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_4.ffn_down", + "count": 57, + "mean_delta_ms": 0.681, + "std_delta_ms": 0.703, + "weight": 28.06 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_5", + "count": 57, + "mean_delta_ms": 0.878, + "std_delta_ms": 0.708, + "weight": 31.55 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_5.q", + "count": 57, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.708, + "weight": 31.69 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_5.k", + "count": 56, + "mean_delta_ms": 0.872, + "std_delta_ms": 0.699, + "weight": 31.09 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_5.v", + "count": 56, + "mean_delta_ms": 0.877, + "std_delta_ms": 0.699, + "weight": 31.17 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_5", + "count": 56, + "mean_delta_ms": 0.884, + "std_delta_ms": 0.699, + "weight": 31.27 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_5.keys", + "count": 56, + "mean_delta_ms": 0.889, + "std_delta_ms": 0.699, + "weight": 31.35 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_5.values", + "count": 56, + "mean_delta_ms": 0.894, + "std_delta_ms": 0.699, + "weight": 31.43 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_5.ffn_up", + "count": 55, + "mean_delta_ms": 0.879, + "std_delta_ms": 0.689, + "weight": 30.83 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_5.ffn_down", + "count": 55, + "mean_delta_ms": 0.884, + "std_delta_ms": 0.689, + "weight": 30.91 + }, + { + "source": "server.kv_cache_4", + "target": "server.buffer", + "count": 56, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.659, + "weight": 34.03 + }, + { + "source": "server.kv_cache_4", + "target": "server.buffer.logits", + "count": 52, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.657, + "weight": 31.63 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_0", + "count": 49, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.649, + "weight": 29.85 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_0.q", + "count": 49, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.649, + "weight": 29.92 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_0.k", + "count": 49, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.649, + "weight": 29.97 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_0.v", + "count": 49, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.648, + "weight": 30.07 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_0", + "count": 49, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.649, + "weight": 30.15 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_0.keys", + "count": 48, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.64, + "weight": 29.52 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_0.values", + "count": 48, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.64, + "weight": 29.58 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_0.ffn_up", + "count": 48, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.64, + "weight": 29.63 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_0.ffn_down", + "count": 48, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.64, + "weight": 29.68 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_1", + "count": 42, + "mean_delta_ms": 1.092, + "std_delta_ms": 0.589, + "weight": 27.29 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_1.q", + "count": 41, + "mean_delta_ms": 1.076, + "std_delta_ms": 0.578, + "weight": 26.67 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_1.k", + "count": 40, + "mean_delta_ms": 1.059, + "std_delta_ms": 0.567, + "weight": 26.06 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_1.v", + "count": 39, + "mean_delta_ms": 1.04, + "std_delta_ms": 0.553, + "weight": 25.46 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_1", + "count": 39, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.553, + "weight": 25.52 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_1.keys", + "count": 39, + "mean_delta_ms": 1.052, + "std_delta_ms": 0.553, + "weight": 25.56 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_1.values", + "count": 39, + "mean_delta_ms": 1.056, + "std_delta_ms": 0.553, + "weight": 25.6 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_1.ffn_up", + "count": 36, + "mean_delta_ms": 0.983, + "std_delta_ms": 0.501, + "weight": 23.85 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_1.ffn_down", + "count": 36, + "mean_delta_ms": 0.988, + "std_delta_ms": 0.501, + "weight": 23.89 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 0.956, + "std_delta_ms": 0.204, + "weight": 23.89 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 0.963, + "std_delta_ms": 0.206, + "weight": 23.9 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 0.968, + "std_delta_ms": 0.206, + "weight": 23.91 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 0.974, + "std_delta_ms": 0.206, + "weight": 23.93 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_2", + "count": 29, + "mean_delta_ms": 0.98, + "std_delta_ms": 0.207, + "weight": 23.95 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_2.keys", + "count": 29, + "mean_delta_ms": 0.986, + "std_delta_ms": 0.207, + "weight": 23.97 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_2.values", + "count": 29, + "mean_delta_ms": 0.991, + "std_delta_ms": 0.207, + "weight": 23.98 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_2.ffn_up", + "count": 29, + "mean_delta_ms": 0.996, + "std_delta_ms": 0.207, + "weight": 24.0 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_2.ffn_down", + "count": 29, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.21, + "weight": 24.02 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 1.19, + "std_delta_ms": 0.22, + "weight": 24.47 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 1.197, + "std_delta_ms": 0.221, + "weight": 24.49 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 1.203, + "std_delta_ms": 0.221, + "weight": 24.51 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 1.21, + "std_delta_ms": 0.221, + "weight": 24.52 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 1.216, + "std_delta_ms": 0.221, + "weight": 24.53 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 1.221, + "std_delta_ms": 0.222, + "weight": 24.54 + }, + { + "source": "server.kv_cache_4", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 1.226, + "std_delta_ms": 0.222, + "weight": 24.56 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 1.231, + "std_delta_ms": 0.222, + "weight": 24.57 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 1.236, + "std_delta_ms": 0.222, + "weight": 24.59 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_4", + "count": 27, + "mean_delta_ms": 1.37, + "std_delta_ms": 0.139, + "weight": 24.5 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_4.q", + "count": 27, + "mean_delta_ms": 1.377, + "std_delta_ms": 0.141, + "weight": 24.49 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_4.k", + "count": 27, + "mean_delta_ms": 1.382, + "std_delta_ms": 0.143, + "weight": 24.47 + }, + { + "source": "server.kv_cache_4", + "target": "server.layer_4.v", + "count": 27, + "mean_delta_ms": 1.388, + "std_delta_ms": 0.143, + "weight": 24.47 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_4.values", + "count": 57, + "mean_delta_ms": 0.665, + "std_delta_ms": 0.703, + "weight": 27.72 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_4.ffn_up", + "count": 57, + "mean_delta_ms": 0.671, + "std_delta_ms": 0.703, + "weight": 27.83 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_4.ffn_down", + "count": 57, + "mean_delta_ms": 0.676, + "std_delta_ms": 0.703, + "weight": 27.94 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_5", + "count": 57, + "mean_delta_ms": 0.872, + "std_delta_ms": 0.708, + "weight": 31.46 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_5.q", + "count": 57, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.708, + "weight": 31.6 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_5.k", + "count": 57, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.708, + "weight": 31.68 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_5.v", + "count": 56, + "mean_delta_ms": 0.872, + "std_delta_ms": 0.699, + "weight": 31.08 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_5", + "count": 56, + "mean_delta_ms": 0.878, + "std_delta_ms": 0.699, + "weight": 31.18 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_5.keys", + "count": 56, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.699, + "weight": 31.26 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_5.values", + "count": 56, + "mean_delta_ms": 0.888, + "std_delta_ms": 0.699, + "weight": 31.34 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_5.ffn_up", + "count": 56, + "mean_delta_ms": 0.893, + "std_delta_ms": 0.699, + "weight": 31.42 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_5.ffn_down", + "count": 55, + "mean_delta_ms": 0.878, + "std_delta_ms": 0.689, + "weight": 30.82 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.buffer", + "count": 56, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.659, + "weight": 33.95 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.buffer.logits", + "count": 52, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.656, + "weight": 31.56 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_0", + "count": 50, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.658, + "weight": 30.48 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_0.q", + "count": 49, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.649, + "weight": 29.85 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_0.k", + "count": 49, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.649, + "weight": 29.91 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_0.v", + "count": 49, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.648, + "weight": 30.01 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_0", + "count": 49, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.649, + "weight": 30.08 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_0.keys", + "count": 49, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.649, + "weight": 30.15 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_0.values", + "count": 49, + "mean_delta_ms": 1.042, + "std_delta_ms": 0.649, + "weight": 30.2 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_0.ffn_up", + "count": 48, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.64, + "weight": 29.57 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_0.ffn_down", + "count": 48, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.64, + "weight": 29.62 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_1", + "count": 43, + "mean_delta_ms": 1.107, + "std_delta_ms": 0.598, + "weight": 27.93 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_1.q", + "count": 41, + "mean_delta_ms": 1.071, + "std_delta_ms": 0.578, + "weight": 26.62 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_1.k", + "count": 41, + "mean_delta_ms": 1.076, + "std_delta_ms": 0.578, + "weight": 26.67 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_1.v", + "count": 41, + "mean_delta_ms": 1.081, + "std_delta_ms": 0.578, + "weight": 26.71 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_1", + "count": 39, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.553, + "weight": 25.47 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_1.keys", + "count": 39, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.553, + "weight": 25.51 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_1.values", + "count": 39, + "mean_delta_ms": 1.051, + "std_delta_ms": 0.553, + "weight": 25.56 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_1.ffn_up", + "count": 39, + "mean_delta_ms": 1.056, + "std_delta_ms": 0.553, + "weight": 25.6 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_1.ffn_down", + "count": 36, + "mean_delta_ms": 0.982, + "std_delta_ms": 0.501, + "weight": 23.84 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 0.95, + "std_delta_ms": 0.204, + "weight": 23.87 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 0.957, + "std_delta_ms": 0.205, + "weight": 23.88 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 0.962, + "std_delta_ms": 0.206, + "weight": 23.89 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 0.968, + "std_delta_ms": 0.206, + "weight": 23.91 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_2", + "count": 29, + "mean_delta_ms": 0.974, + "std_delta_ms": 0.206, + "weight": 23.93 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_2.keys", + "count": 29, + "mean_delta_ms": 0.98, + "std_delta_ms": 0.207, + "weight": 23.95 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_2.values", + "count": 29, + "mean_delta_ms": 0.985, + "std_delta_ms": 0.207, + "weight": 23.96 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_2.ffn_up", + "count": 29, + "mean_delta_ms": 0.99, + "std_delta_ms": 0.207, + "weight": 23.98 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_2.ffn_down", + "count": 29, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.209, + "weight": 24.0 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 1.184, + "std_delta_ms": 0.22, + "weight": 24.46 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 1.192, + "std_delta_ms": 0.22, + "weight": 24.47 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 1.197, + "std_delta_ms": 0.22, + "weight": 24.49 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 1.204, + "std_delta_ms": 0.221, + "weight": 24.5 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 1.21, + "std_delta_ms": 0.221, + "weight": 24.52 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.221, + "weight": 24.53 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 1.221, + "std_delta_ms": 0.222, + "weight": 24.55 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 1.226, + "std_delta_ms": 0.222, + "weight": 24.56 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 1.231, + "std_delta_ms": 0.222, + "weight": 24.57 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_4", + "count": 27, + "mean_delta_ms": 1.364, + "std_delta_ms": 0.139, + "weight": 24.5 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_4.q", + "count": 27, + "mean_delta_ms": 1.371, + "std_delta_ms": 0.141, + "weight": 24.49 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_4.k", + "count": 27, + "mean_delta_ms": 1.377, + "std_delta_ms": 0.143, + "weight": 24.46 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.layer_4.v", + "count": 27, + "mean_delta_ms": 1.382, + "std_delta_ms": 0.143, + "weight": 24.46 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.kv_cache_4", + "count": 27, + "mean_delta_ms": 1.388, + "std_delta_ms": 0.144, + "weight": 24.47 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_4.ffn_up", + "count": 57, + "mean_delta_ms": 0.665, + "std_delta_ms": 0.703, + "weight": 27.72 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_4.ffn_down", + "count": 57, + "mean_delta_ms": 0.67, + "std_delta_ms": 0.703, + "weight": 27.83 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_5", + "count": 57, + "mean_delta_ms": 0.867, + "std_delta_ms": 0.708, + "weight": 31.38 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_5.q", + "count": 57, + "mean_delta_ms": 0.876, + "std_delta_ms": 0.708, + "weight": 31.51 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_5.k", + "count": 57, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.708, + "weight": 31.6 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_5.v", + "count": 57, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.708, + "weight": 31.68 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_5", + "count": 56, + "mean_delta_ms": 0.872, + "std_delta_ms": 0.699, + "weight": 31.1 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_5.keys", + "count": 56, + "mean_delta_ms": 0.878, + "std_delta_ms": 0.699, + "weight": 31.18 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_5.values", + "count": 56, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.699, + "weight": 31.26 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_5.ffn_up", + "count": 56, + "mean_delta_ms": 0.888, + "std_delta_ms": 0.699, + "weight": 31.34 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_5.ffn_down", + "count": 56, + "mean_delta_ms": 0.893, + "std_delta_ms": 0.699, + "weight": 31.42 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.buffer", + "count": 57, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.666, + "weight": 34.57 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.buffer.logits", + "count": 52, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.657, + "weight": 31.49 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_0", + "count": 50, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.658, + "weight": 30.41 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_0.q", + "count": 50, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.658, + "weight": 30.48 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_0.k", + "count": 49, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.65, + "weight": 29.84 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_0.v", + "count": 49, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.649, + "weight": 29.95 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_0", + "count": 49, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.649, + "weight": 30.02 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_0.keys", + "count": 49, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.649, + "weight": 30.09 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_0.values", + "count": 49, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.649, + "weight": 30.14 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_0.ffn_up", + "count": 49, + "mean_delta_ms": 1.042, + "std_delta_ms": 0.649, + "weight": 30.19 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_0.ffn_down", + "count": 48, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.64, + "weight": 29.56 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_1", + "count": 43, + "mean_delta_ms": 1.102, + "std_delta_ms": 0.598, + "weight": 27.88 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_1.q", + "count": 43, + "mean_delta_ms": 1.109, + "std_delta_ms": 0.598, + "weight": 27.94 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_1.k", + "count": 41, + "mean_delta_ms": 1.071, + "std_delta_ms": 0.578, + "weight": 26.62 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_1.v", + "count": 41, + "mean_delta_ms": 1.076, + "std_delta_ms": 0.578, + "weight": 26.67 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_1", + "count": 40, + "mean_delta_ms": 1.059, + "std_delta_ms": 0.566, + "weight": 26.07 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_1.keys", + "count": 39, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.553, + "weight": 25.47 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_1.values", + "count": 39, + "mean_delta_ms": 1.045, + "std_delta_ms": 0.553, + "weight": 25.51 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_1.ffn_up", + "count": 39, + "mean_delta_ms": 1.05, + "std_delta_ms": 0.553, + "weight": 25.55 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_1.ffn_down", + "count": 39, + "mean_delta_ms": 1.055, + "std_delta_ms": 0.553, + "weight": 25.59 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 0.944, + "std_delta_ms": 0.204, + "weight": 23.86 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.205, + "weight": 23.86 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 0.957, + "std_delta_ms": 0.205, + "weight": 23.88 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 0.963, + "std_delta_ms": 0.205, + "weight": 23.9 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_2", + "count": 29, + "mean_delta_ms": 0.969, + "std_delta_ms": 0.206, + "weight": 23.92 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_2.keys", + "count": 29, + "mean_delta_ms": 0.974, + "std_delta_ms": 0.206, + "weight": 23.93 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_2.values", + "count": 29, + "mean_delta_ms": 0.979, + "std_delta_ms": 0.207, + "weight": 23.95 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_2.ffn_up", + "count": 29, + "mean_delta_ms": 0.984, + "std_delta_ms": 0.207, + "weight": 23.97 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_2.ffn_down", + "count": 29, + "mean_delta_ms": 1.0, + "std_delta_ms": 0.209, + "weight": 23.99 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 1.178, + "std_delta_ms": 0.219, + "weight": 24.45 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 1.186, + "std_delta_ms": 0.22, + "weight": 24.46 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 1.192, + "std_delta_ms": 0.22, + "weight": 24.48 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 1.198, + "std_delta_ms": 0.221, + "weight": 24.49 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 1.205, + "std_delta_ms": 0.221, + "weight": 24.51 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 1.21, + "std_delta_ms": 0.221, + "weight": 24.52 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.221, + "weight": 24.54 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 1.22, + "std_delta_ms": 0.221, + "weight": 24.55 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 1.225, + "std_delta_ms": 0.221, + "weight": 24.56 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_4", + "count": 27, + "mean_delta_ms": 1.359, + "std_delta_ms": 0.138, + "weight": 24.52 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_4.q", + "count": 27, + "mean_delta_ms": 1.366, + "std_delta_ms": 0.139, + "weight": 24.5 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_4.k", + "count": 27, + "mean_delta_ms": 1.371, + "std_delta_ms": 0.141, + "weight": 24.48 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.layer_4.v", + "count": 27, + "mean_delta_ms": 1.376, + "std_delta_ms": 0.142, + "weight": 24.48 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_4", + "count": 27, + "mean_delta_ms": 1.382, + "std_delta_ms": 0.142, + "weight": 24.49 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.kv_cache_4.keys", + "count": 27, + "mean_delta_ms": 1.388, + "std_delta_ms": 0.142, + "weight": 24.49 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_4.ffn_down", + "count": 57, + "mean_delta_ms": 0.665, + "std_delta_ms": 0.703, + "weight": 27.72 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_5", + "count": 57, + "mean_delta_ms": 0.861, + "std_delta_ms": 0.708, + "weight": 31.29 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_5.q", + "count": 57, + "mean_delta_ms": 0.87, + "std_delta_ms": 0.708, + "weight": 31.43 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_5.k", + "count": 57, + "mean_delta_ms": 0.876, + "std_delta_ms": 0.708, + "weight": 31.52 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_5.v", + "count": 57, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.708, + "weight": 31.59 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_5", + "count": 57, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.708, + "weight": 31.69 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_5.keys", + "count": 56, + "mean_delta_ms": 0.873, + "std_delta_ms": 0.699, + "weight": 31.1 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_5.values", + "count": 56, + "mean_delta_ms": 0.878, + "std_delta_ms": 0.699, + "weight": 31.18 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_5.ffn_up", + "count": 56, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.699, + "weight": 31.26 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_5.ffn_down", + "count": 56, + "mean_delta_ms": 0.888, + "std_delta_ms": 0.699, + "weight": 31.34 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.buffer", + "count": 57, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.666, + "weight": 34.5 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.buffer.logits", + "count": 53, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.664, + "weight": 32.12 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_0", + "count": 50, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.658, + "weight": 30.35 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_0.q", + "count": 50, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.658, + "weight": 30.42 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_0.k", + "count": 50, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.658, + "weight": 30.47 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_0.v", + "count": 49, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.649, + "weight": 29.89 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_0", + "count": 49, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.649, + "weight": 29.96 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_0.keys", + "count": 49, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.649, + "weight": 30.03 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_0.values", + "count": 49, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.649, + "weight": 30.08 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_0.ffn_up", + "count": 49, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.649, + "weight": 30.14 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_0.ffn_down", + "count": 49, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.649, + "weight": 30.19 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_1", + "count": 43, + "mean_delta_ms": 1.097, + "std_delta_ms": 0.598, + "weight": 27.83 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_1.q", + "count": 43, + "mean_delta_ms": 1.103, + "std_delta_ms": 0.598, + "weight": 27.89 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_1.k", + "count": 42, + "mean_delta_ms": 1.087, + "std_delta_ms": 0.589, + "weight": 27.25 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_1.v", + "count": 41, + "mean_delta_ms": 1.07, + "std_delta_ms": 0.578, + "weight": 26.62 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_1", + "count": 41, + "mean_delta_ms": 1.077, + "std_delta_ms": 0.578, + "weight": 26.68 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_1.keys", + "count": 40, + "mean_delta_ms": 1.059, + "std_delta_ms": 0.566, + "weight": 26.07 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_1.values", + "count": 40, + "mean_delta_ms": 1.064, + "std_delta_ms": 0.566, + "weight": 26.11 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_1.ffn_up", + "count": 39, + "mean_delta_ms": 1.045, + "std_delta_ms": 0.553, + "weight": 25.51 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_1.ffn_down", + "count": 39, + "mean_delta_ms": 1.05, + "std_delta_ms": 0.553, + "weight": 25.55 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 0.939, + "std_delta_ms": 0.204, + "weight": 23.83 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 0.946, + "std_delta_ms": 0.205, + "weight": 23.84 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.205, + "weight": 23.85 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 0.957, + "std_delta_ms": 0.205, + "weight": 23.88 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_2", + "count": 29, + "mean_delta_ms": 0.964, + "std_delta_ms": 0.206, + "weight": 23.89 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_2.keys", + "count": 29, + "mean_delta_ms": 0.969, + "std_delta_ms": 0.206, + "weight": 23.91 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_2.values", + "count": 29, + "mean_delta_ms": 0.974, + "std_delta_ms": 0.207, + "weight": 23.93 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_2.ffn_up", + "count": 29, + "mean_delta_ms": 0.979, + "std_delta_ms": 0.207, + "weight": 23.95 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_2.ffn_down", + "count": 29, + "mean_delta_ms": 0.995, + "std_delta_ms": 0.209, + "weight": 23.97 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 1.173, + "std_delta_ms": 0.219, + "weight": 24.43 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 1.181, + "std_delta_ms": 0.22, + "weight": 24.44 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 1.187, + "std_delta_ms": 0.22, + "weight": 24.46 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 1.193, + "std_delta_ms": 0.221, + "weight": 24.47 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 1.2, + "std_delta_ms": 0.221, + "weight": 24.49 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 1.205, + "std_delta_ms": 0.221, + "weight": 24.5 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 1.21, + "std_delta_ms": 0.221, + "weight": 24.52 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.221, + "weight": 24.53 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 1.22, + "std_delta_ms": 0.221, + "weight": 24.55 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_4", + "count": 27, + "mean_delta_ms": 1.353, + "std_delta_ms": 0.138, + "weight": 24.5 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_4.q", + "count": 27, + "mean_delta_ms": 1.36, + "std_delta_ms": 0.139, + "weight": 24.49 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_4.k", + "count": 27, + "mean_delta_ms": 1.366, + "std_delta_ms": 0.141, + "weight": 24.47 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.layer_4.v", + "count": 27, + "mean_delta_ms": 1.371, + "std_delta_ms": 0.142, + "weight": 24.47 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_4", + "count": 27, + "mean_delta_ms": 1.377, + "std_delta_ms": 0.142, + "weight": 24.47 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_4.keys", + "count": 27, + "mean_delta_ms": 1.383, + "std_delta_ms": 0.143, + "weight": 24.48 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.kv_cache_4.values", + "count": 27, + "mean_delta_ms": 1.388, + "std_delta_ms": 0.143, + "weight": 24.47 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_5", + "count": 57, + "mean_delta_ms": 0.856, + "std_delta_ms": 0.708, + "weight": 31.21 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_5.q", + "count": 57, + "mean_delta_ms": 0.865, + "std_delta_ms": 0.708, + "weight": 31.35 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_5.k", + "count": 57, + "mean_delta_ms": 0.871, + "std_delta_ms": 0.708, + "weight": 31.43 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_5.v", + "count": 57, + "mean_delta_ms": 0.876, + "std_delta_ms": 0.708, + "weight": 31.51 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_5", + "count": 57, + "mean_delta_ms": 0.882, + "std_delta_ms": 0.708, + "weight": 31.61 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_5.keys", + "count": 57, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.708, + "weight": 31.7 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_5.values", + "count": 56, + "mean_delta_ms": 0.873, + "std_delta_ms": 0.699, + "weight": 31.1 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_5.ffn_up", + "count": 56, + "mean_delta_ms": 0.878, + "std_delta_ms": 0.699, + "weight": 31.18 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_5.ffn_down", + "count": 56, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.699, + "weight": 31.26 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.buffer", + "count": 57, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.666, + "weight": 34.43 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.buffer.logits", + "count": 53, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.664, + "weight": 32.06 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_0", + "count": 50, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.658, + "weight": 30.29 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_0.q", + "count": 50, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.658, + "weight": 30.36 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_0.k", + "count": 50, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.658, + "weight": 30.41 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_0.v", + "count": 50, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.657, + "weight": 30.52 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_0", + "count": 49, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.649, + "weight": 29.9 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_0.keys", + "count": 49, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.649, + "weight": 29.97 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_0.values", + "count": 49, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.649, + "weight": 30.03 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_0.ffn_up", + "count": 49, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.649, + "weight": 30.08 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_0.ffn_down", + "count": 49, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.649, + "weight": 30.14 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_1", + "count": 44, + "mean_delta_ms": 1.112, + "std_delta_ms": 0.606, + "weight": 28.48 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_1.q", + "count": 43, + "mean_delta_ms": 1.098, + "std_delta_ms": 0.598, + "weight": 27.85 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_1.k", + "count": 43, + "mean_delta_ms": 1.104, + "std_delta_ms": 0.598, + "weight": 27.89 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_1.v", + "count": 42, + "mean_delta_ms": 1.087, + "std_delta_ms": 0.589, + "weight": 27.25 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_1", + "count": 41, + "mean_delta_ms": 1.072, + "std_delta_ms": 0.578, + "weight": 26.64 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_1.keys", + "count": 41, + "mean_delta_ms": 1.077, + "std_delta_ms": 0.578, + "weight": 26.69 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_1.values", + "count": 40, + "mean_delta_ms": 1.059, + "std_delta_ms": 0.566, + "weight": 26.07 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_1.ffn_up", + "count": 40, + "mean_delta_ms": 1.064, + "std_delta_ms": 0.566, + "weight": 26.11 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_1.ffn_down", + "count": 39, + "mean_delta_ms": 1.045, + "std_delta_ms": 0.553, + "weight": 25.51 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_2", + "count": 29, + "mean_delta_ms": 0.934, + "std_delta_ms": 0.204, + "weight": 23.81 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_2.q", + "count": 29, + "mean_delta_ms": 0.941, + "std_delta_ms": 0.205, + "weight": 23.82 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_2.k", + "count": 29, + "mean_delta_ms": 0.947, + "std_delta_ms": 0.205, + "weight": 23.84 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_2.v", + "count": 29, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.205, + "weight": 23.86 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_2", + "count": 29, + "mean_delta_ms": 0.958, + "std_delta_ms": 0.206, + "weight": 23.88 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_2.keys", + "count": 29, + "mean_delta_ms": 0.964, + "std_delta_ms": 0.206, + "weight": 23.89 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_2.values", + "count": 29, + "mean_delta_ms": 0.969, + "std_delta_ms": 0.206, + "weight": 23.91 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_2.ffn_up", + "count": 29, + "mean_delta_ms": 0.974, + "std_delta_ms": 0.207, + "weight": 23.93 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_2.ffn_down", + "count": 29, + "mean_delta_ms": 0.989, + "std_delta_ms": 0.209, + "weight": 23.95 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 1.168, + "std_delta_ms": 0.219, + "weight": 24.42 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 1.176, + "std_delta_ms": 0.22, + "weight": 24.43 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 1.182, + "std_delta_ms": 0.22, + "weight": 24.45 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 1.188, + "std_delta_ms": 0.22, + "weight": 24.46 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 1.194, + "std_delta_ms": 0.221, + "weight": 24.48 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 1.2, + "std_delta_ms": 0.221, + "weight": 24.49 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 1.205, + "std_delta_ms": 0.221, + "weight": 24.51 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 1.21, + "std_delta_ms": 0.221, + "weight": 24.52 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.221, + "weight": 24.53 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_4", + "count": 27, + "mean_delta_ms": 1.348, + "std_delta_ms": 0.138, + "weight": 24.5 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_4.q", + "count": 27, + "mean_delta_ms": 1.355, + "std_delta_ms": 0.139, + "weight": 24.49 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_4.k", + "count": 27, + "mean_delta_ms": 1.361, + "std_delta_ms": 0.141, + "weight": 24.46 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_4.v", + "count": 27, + "mean_delta_ms": 1.366, + "std_delta_ms": 0.142, + "weight": 24.46 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_4", + "count": 27, + "mean_delta_ms": 1.372, + "std_delta_ms": 0.142, + "weight": 24.47 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_4.keys", + "count": 27, + "mean_delta_ms": 1.378, + "std_delta_ms": 0.142, + "weight": 24.47 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.kv_cache_4.values", + "count": 27, + "mean_delta_ms": 1.383, + "std_delta_ms": 0.143, + "weight": 24.47 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.layer_4.ffn_up", + "count": 27, + "mean_delta_ms": 1.388, + "std_delta_ms": 0.143, + "weight": 24.48 + }, + { + "source": "server.layer_5", + "target": "server.layer_5.q", + "count": 58, + "mean_delta_ms": 0.695, + "std_delta_ms": 0.72, + "weight": 28.49 + }, + { + "source": "server.layer_5", + "target": "server.layer_5.k", + "count": 58, + "mean_delta_ms": 0.701, + "std_delta_ms": 0.72, + "weight": 28.6 + }, + { + "source": "server.layer_5", + "target": "server.layer_5.v", + "count": 58, + "mean_delta_ms": 0.705, + "std_delta_ms": 0.72, + "weight": 28.7 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_5", + "count": 58, + "mean_delta_ms": 0.712, + "std_delta_ms": 0.72, + "weight": 28.83 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_5.keys", + "count": 58, + "mean_delta_ms": 0.717, + "std_delta_ms": 0.72, + "weight": 28.94 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_5.values", + "count": 58, + "mean_delta_ms": 0.722, + "std_delta_ms": 0.72, + "weight": 29.05 + }, + { + "source": "server.layer_5", + "target": "server.layer_5.ffn_up", + "count": 58, + "mean_delta_ms": 0.727, + "std_delta_ms": 0.72, + "weight": 29.14 + }, + { + "source": "server.layer_5", + "target": "server.layer_5.ffn_down", + "count": 58, + "mean_delta_ms": 0.732, + "std_delta_ms": 0.72, + "weight": 29.24 + }, + { + "source": "server.layer_5", + "target": "server.buffer", + "count": 60, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.694, + "weight": 33.6 + }, + { + "source": "server.layer_5", + "target": "server.buffer.logits", + "count": 56, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.693, + "weight": 31.45 + }, + { + "source": "server.layer_5", + "target": "server.layer_0", + "count": 53, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.684, + "weight": 29.83 + }, + { + "source": "server.layer_5", + "target": "server.layer_0.q", + "count": 53, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.684, + "weight": 29.92 + }, + { + "source": "server.layer_5", + "target": "server.layer_0.k", + "count": 53, + "mean_delta_ms": 0.892, + "std_delta_ms": 0.684, + "weight": 29.99 + }, + { + "source": "server.layer_5", + "target": "server.layer_0.v", + "count": 52, + "mean_delta_ms": 0.879, + "std_delta_ms": 0.674, + "weight": 29.43 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_0", + "count": 52, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.674, + "weight": 29.53 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_0.keys", + "count": 52, + "mean_delta_ms": 0.892, + "std_delta_ms": 0.674, + "weight": 29.62 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_0.values", + "count": 52, + "mean_delta_ms": 0.896, + "std_delta_ms": 0.674, + "weight": 29.68 + }, + { + "source": "server.layer_5", + "target": "server.layer_0.ffn_up", + "count": 52, + "mean_delta_ms": 0.901, + "std_delta_ms": 0.674, + "weight": 29.75 + }, + { + "source": "server.layer_5", + "target": "server.layer_0.ffn_down", + "count": 52, + "mean_delta_ms": 0.906, + "std_delta_ms": 0.674, + "weight": 29.82 + }, + { + "source": "server.layer_5", + "target": "server.layer_1", + "count": 48, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.64, + "weight": 29.32 + }, + { + "source": "server.layer_5", + "target": "server.layer_1.q", + "count": 48, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.64, + "weight": 29.39 + }, + { + "source": "server.layer_5", + "target": "server.layer_1.k", + "count": 48, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.64, + "weight": 29.45 + }, + { + "source": "server.layer_5", + "target": "server.layer_1.v", + "count": 47, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.631, + "weight": 28.83 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_1", + "count": 47, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.63, + "weight": 28.91 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_1.keys", + "count": 47, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.63, + "weight": 28.97 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_1.values", + "count": 47, + "mean_delta_ms": 1.017, + "std_delta_ms": 0.63, + "weight": 29.02 + }, + { + "source": "server.layer_5", + "target": "server.layer_1.ffn_up", + "count": 47, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.63, + "weight": 29.08 + }, + { + "source": "server.layer_5", + "target": "server.layer_1.ffn_down", + "count": 47, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.63, + "weight": 29.13 + }, + { + "source": "server.layer_5", + "target": "server.layer_2", + "count": 39, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.542, + "weight": 25.68 + }, + { + "source": "server.layer_5", + "target": "server.layer_2.q", + "count": 39, + "mean_delta_ms": 1.053, + "std_delta_ms": 0.542, + "weight": 25.74 + }, + { + "source": "server.layer_5", + "target": "server.layer_2.k", + "count": 39, + "mean_delta_ms": 1.058, + "std_delta_ms": 0.542, + "weight": 25.79 + }, + { + "source": "server.layer_5", + "target": "server.layer_2.v", + "count": 37, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.509, + "weight": 24.62 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_2", + "count": 37, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.509, + "weight": 24.68 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_2.keys", + "count": 37, + "mean_delta_ms": 1.024, + "std_delta_ms": 0.509, + "weight": 24.72 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_2.values", + "count": 35, + "mean_delta_ms": 0.974, + "std_delta_ms": 0.465, + "weight": 23.69 + }, + { + "source": "server.layer_5", + "target": "server.layer_2.ffn_up", + "count": 35, + "mean_delta_ms": 0.978, + "std_delta_ms": 0.465, + "weight": 23.73 + }, + { + "source": "server.layer_5", + "target": "server.layer_2.ffn_down", + "count": 35, + "mean_delta_ms": 0.992, + "std_delta_ms": 0.462, + "weight": 23.88 + }, + { + "source": "server.layer_5", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 0.971, + "std_delta_ms": 0.176, + "weight": 24.55 + }, + { + "source": "server.layer_5", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 0.979, + "std_delta_ms": 0.177, + "weight": 24.56 + }, + { + "source": "server.layer_5", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 0.985, + "std_delta_ms": 0.177, + "weight": 24.59 + }, + { + "source": "server.layer_5", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 0.991, + "std_delta_ms": 0.177, + "weight": 24.6 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.178, + "weight": 24.62 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 1.003, + "std_delta_ms": 0.178, + "weight": 24.63 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.178, + "weight": 24.65 + }, + { + "source": "server.layer_5", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.178, + "weight": 24.67 + }, + { + "source": "server.layer_5", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.178, + "weight": 24.68 + }, + { + "source": "server.layer_5", + "target": "server.layer_4", + "count": 29, + "mean_delta_ms": 1.205, + "std_delta_ms": 0.195, + "weight": 24.96 + }, + { + "source": "server.layer_5", + "target": "server.layer_4.q", + "count": 29, + "mean_delta_ms": 1.212, + "std_delta_ms": 0.196, + "weight": 24.97 + }, + { + "source": "server.layer_5", + "target": "server.layer_4.k", + "count": 29, + "mean_delta_ms": 1.218, + "std_delta_ms": 0.197, + "weight": 24.96 + }, + { + "source": "server.layer_5", + "target": "server.layer_4.v", + "count": 29, + "mean_delta_ms": 1.223, + "std_delta_ms": 0.197, + "weight": 24.97 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_4", + "count": 29, + "mean_delta_ms": 1.229, + "std_delta_ms": 0.199, + "weight": 24.96 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_4.keys", + "count": 29, + "mean_delta_ms": 1.235, + "std_delta_ms": 0.199, + "weight": 24.97 + }, + { + "source": "server.layer_5", + "target": "server.kv_cache_4.values", + "count": 29, + "mean_delta_ms": 1.24, + "std_delta_ms": 0.2, + "weight": 24.97 + }, + { + "source": "server.layer_5", + "target": "server.layer_4.ffn_up", + "count": 29, + "mean_delta_ms": 1.246, + "std_delta_ms": 0.2, + "weight": 24.99 + }, + { + "source": "server.layer_5", + "target": "server.layer_4.ffn_down", + "count": 29, + "mean_delta_ms": 1.251, + "std_delta_ms": 0.2, + "weight": 25.0 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_5.k", + "count": 58, + "mean_delta_ms": 0.691, + "std_delta_ms": 0.72, + "weight": 28.42 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_5.v", + "count": 58, + "mean_delta_ms": 0.696, + "std_delta_ms": 0.72, + "weight": 28.52 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_5", + "count": 58, + "mean_delta_ms": 0.703, + "std_delta_ms": 0.72, + "weight": 28.65 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_5.keys", + "count": 58, + "mean_delta_ms": 0.708, + "std_delta_ms": 0.72, + "weight": 28.76 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_5.values", + "count": 58, + "mean_delta_ms": 0.713, + "std_delta_ms": 0.72, + "weight": 28.87 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_5.ffn_up", + "count": 58, + "mean_delta_ms": 0.718, + "std_delta_ms": 0.72, + "weight": 28.97 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_5.ffn_down", + "count": 58, + "mean_delta_ms": 0.723, + "std_delta_ms": 0.72, + "weight": 29.07 + }, + { + "source": "server.layer_5.q", + "target": "server.buffer", + "count": 60, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.694, + "weight": 33.44 + }, + { + "source": "server.layer_5.q", + "target": "server.buffer.logits", + "count": 56, + "mean_delta_ms": 0.878, + "std_delta_ms": 0.692, + "weight": 31.32 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_0", + "count": 54, + "mean_delta_ms": 0.893, + "std_delta_ms": 0.694, + "weight": 30.38 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_0.q", + "count": 54, + "mean_delta_ms": 0.899, + "std_delta_ms": 0.694, + "weight": 30.46 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_0.k", + "count": 54, + "mean_delta_ms": 0.903, + "std_delta_ms": 0.694, + "weight": 30.53 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_0.v", + "count": 53, + "mean_delta_ms": 0.891, + "std_delta_ms": 0.685, + "weight": 29.97 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_0", + "count": 52, + "mean_delta_ms": 0.877, + "std_delta_ms": 0.674, + "weight": 29.4 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_0.keys", + "count": 52, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.674, + "weight": 29.49 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_0.values", + "count": 52, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.674, + "weight": 29.56 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_0.ffn_up", + "count": 52, + "mean_delta_ms": 0.892, + "std_delta_ms": 0.674, + "weight": 29.63 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_0.ffn_down", + "count": 52, + "mean_delta_ms": 0.897, + "std_delta_ms": 0.674, + "weight": 29.7 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_1", + "count": 49, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.649, + "weight": 29.9 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_1.q", + "count": 48, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.64, + "weight": 29.29 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_1.k", + "count": 48, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.64, + "weight": 29.35 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_1.v", + "count": 48, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.64, + "weight": 29.41 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_1", + "count": 47, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.63, + "weight": 28.81 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_1.keys", + "count": 47, + "mean_delta_ms": 1.003, + "std_delta_ms": 0.63, + "weight": 28.87 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_1.values", + "count": 47, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.63, + "weight": 28.93 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_1.ffn_up", + "count": 47, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.63, + "weight": 28.98 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_1.ffn_down", + "count": 47, + "mean_delta_ms": 1.018, + "std_delta_ms": 0.63, + "weight": 29.04 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_2", + "count": 39, + "mean_delta_ms": 1.037, + "std_delta_ms": 0.542, + "weight": 25.61 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_2.q", + "count": 39, + "mean_delta_ms": 1.044, + "std_delta_ms": 0.542, + "weight": 25.67 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_2.k", + "count": 39, + "mean_delta_ms": 1.049, + "std_delta_ms": 0.542, + "weight": 25.72 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_2.v", + "count": 39, + "mean_delta_ms": 1.054, + "std_delta_ms": 0.542, + "weight": 25.77 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_2", + "count": 38, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.526, + "weight": 25.2 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_2.keys", + "count": 37, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.508, + "weight": 24.66 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_2.values", + "count": 37, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.508, + "weight": 24.7 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_2.ffn_up", + "count": 36, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.488, + "weight": 24.18 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_2.ffn_down", + "count": 36, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.485, + "weight": 24.33 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 0.962, + "std_delta_ms": 0.174, + "weight": 24.55 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 0.97, + "std_delta_ms": 0.175, + "weight": 24.57 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 0.976, + "std_delta_ms": 0.175, + "weight": 24.59 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 0.982, + "std_delta_ms": 0.175, + "weight": 24.61 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 0.989, + "std_delta_ms": 0.176, + "weight": 24.63 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 0.994, + "std_delta_ms": 0.176, + "weight": 24.64 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.176, + "weight": 24.66 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.176, + "weight": 24.68 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.176, + "weight": 24.69 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_4", + "count": 29, + "mean_delta_ms": 1.196, + "std_delta_ms": 0.193, + "weight": 24.97 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_4.q", + "count": 29, + "mean_delta_ms": 1.203, + "std_delta_ms": 0.194, + "weight": 24.97 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_4.k", + "count": 29, + "mean_delta_ms": 1.208, + "std_delta_ms": 0.195, + "weight": 24.96 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_4.v", + "count": 29, + "mean_delta_ms": 1.213, + "std_delta_ms": 0.196, + "weight": 24.97 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_4", + "count": 29, + "mean_delta_ms": 1.22, + "std_delta_ms": 0.197, + "weight": 24.97 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_4.keys", + "count": 29, + "mean_delta_ms": 1.226, + "std_delta_ms": 0.198, + "weight": 24.98 + }, + { + "source": "server.layer_5.q", + "target": "server.kv_cache_4.values", + "count": 29, + "mean_delta_ms": 1.231, + "std_delta_ms": 0.198, + "weight": 24.98 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_4.ffn_up", + "count": 29, + "mean_delta_ms": 1.236, + "std_delta_ms": 0.198, + "weight": 24.99 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_4.ffn_down", + "count": 29, + "mean_delta_ms": 1.241, + "std_delta_ms": 0.199, + "weight": 25.0 + }, + { + "source": "server.layer_5.q", + "target": "server.layer_5", + "count": 28, + "mean_delta_ms": 1.412, + "std_delta_ms": 0.167, + "weight": 25.05 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_5.v", + "count": 58, + "mean_delta_ms": 0.691, + "std_delta_ms": 0.72, + "weight": 28.41 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_5", + "count": 58, + "mean_delta_ms": 0.697, + "std_delta_ms": 0.72, + "weight": 28.54 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_5.keys", + "count": 58, + "mean_delta_ms": 0.703, + "std_delta_ms": 0.72, + "weight": 28.65 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_5.values", + "count": 58, + "mean_delta_ms": 0.708, + "std_delta_ms": 0.72, + "weight": 28.76 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_5.ffn_up", + "count": 58, + "mean_delta_ms": 0.713, + "std_delta_ms": 0.72, + "weight": 28.86 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_5.ffn_down", + "count": 58, + "mean_delta_ms": 0.718, + "std_delta_ms": 0.72, + "weight": 28.96 + }, + { + "source": "server.layer_5.k", + "target": "server.buffer", + "count": 60, + "mean_delta_ms": 0.868, + "std_delta_ms": 0.694, + "weight": 33.35 + }, + { + "source": "server.layer_5.k", + "target": "server.buffer.logits", + "count": 56, + "mean_delta_ms": 0.873, + "std_delta_ms": 0.692, + "weight": 31.23 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_0", + "count": 54, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.694, + "weight": 30.3 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_0.q", + "count": 54, + "mean_delta_ms": 0.893, + "std_delta_ms": 0.694, + "weight": 30.38 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_0.k", + "count": 54, + "mean_delta_ms": 0.898, + "std_delta_ms": 0.694, + "weight": 30.46 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_0.v", + "count": 53, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.685, + "weight": 29.89 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_0", + "count": 53, + "mean_delta_ms": 0.892, + "std_delta_ms": 0.685, + "weight": 29.99 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_0.keys", + "count": 53, + "mean_delta_ms": 0.898, + "std_delta_ms": 0.684, + "weight": 30.08 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_0.values", + "count": 52, + "mean_delta_ms": 0.882, + "std_delta_ms": 0.674, + "weight": 29.48 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_0.ffn_up", + "count": 52, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.674, + "weight": 29.55 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_0.ffn_down", + "count": 52, + "mean_delta_ms": 0.892, + "std_delta_ms": 0.674, + "weight": 29.62 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_1", + "count": 49, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.649, + "weight": 29.84 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_1.q", + "count": 48, + "mean_delta_ms": 0.996, + "std_delta_ms": 0.64, + "weight": 29.23 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_1.k", + "count": 48, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.64, + "weight": 29.29 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_1.v", + "count": 48, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.64, + "weight": 29.35 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_1", + "count": 48, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.639, + "weight": 29.43 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_1.keys", + "count": 47, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.63, + "weight": 28.81 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_1.values", + "count": 47, + "mean_delta_ms": 1.003, + "std_delta_ms": 0.63, + "weight": 28.87 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_1.ffn_up", + "count": 47, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.63, + "weight": 28.92 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_1.ffn_down", + "count": 47, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.63, + "weight": 28.98 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_2", + "count": 39, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.542, + "weight": 25.57 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_2.q", + "count": 39, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.542, + "weight": 25.63 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_2.k", + "count": 39, + "mean_delta_ms": 1.044, + "std_delta_ms": 0.542, + "weight": 25.67 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_2.v", + "count": 39, + "mean_delta_ms": 1.049, + "std_delta_ms": 0.541, + "weight": 25.72 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_2", + "count": 38, + "mean_delta_ms": 1.03, + "std_delta_ms": 0.526, + "weight": 25.16 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_2.keys", + "count": 38, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.526, + "weight": 25.2 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_2.values", + "count": 37, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.508, + "weight": 24.66 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_2.ffn_up", + "count": 37, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.508, + "weight": 24.7 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_2.ffn_down", + "count": 36, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.485, + "weight": 24.29 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 0.957, + "std_delta_ms": 0.174, + "weight": 24.54 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 0.965, + "std_delta_ms": 0.175, + "weight": 24.55 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 0.97, + "std_delta_ms": 0.175, + "weight": 24.57 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 0.977, + "std_delta_ms": 0.175, + "weight": 24.59 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 0.983, + "std_delta_ms": 0.175, + "weight": 24.61 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 0.988, + "std_delta_ms": 0.175, + "weight": 24.63 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 0.993, + "std_delta_ms": 0.176, + "weight": 24.64 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.176, + "weight": 24.66 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 1.004, + "std_delta_ms": 0.176, + "weight": 24.68 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_4", + "count": 29, + "mean_delta_ms": 1.19, + "std_delta_ms": 0.193, + "weight": 24.96 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_4.q", + "count": 29, + "mean_delta_ms": 1.197, + "std_delta_ms": 0.194, + "weight": 24.96 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_4.k", + "count": 29, + "mean_delta_ms": 1.203, + "std_delta_ms": 0.195, + "weight": 24.95 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_4.v", + "count": 29, + "mean_delta_ms": 1.208, + "std_delta_ms": 0.195, + "weight": 24.96 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_4", + "count": 29, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.197, + "weight": 24.96 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_4.keys", + "count": 29, + "mean_delta_ms": 1.22, + "std_delta_ms": 0.197, + "weight": 24.96 + }, + { + "source": "server.layer_5.k", + "target": "server.kv_cache_4.values", + "count": 29, + "mean_delta_ms": 1.226, + "std_delta_ms": 0.198, + "weight": 24.96 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_4.ffn_up", + "count": 29, + "mean_delta_ms": 1.231, + "std_delta_ms": 0.198, + "weight": 24.98 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_4.ffn_down", + "count": 29, + "mean_delta_ms": 1.236, + "std_delta_ms": 0.198, + "weight": 24.99 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_5", + "count": 28, + "mean_delta_ms": 1.407, + "std_delta_ms": 0.166, + "weight": 25.04 + }, + { + "source": "server.layer_5.k", + "target": "server.layer_5.q", + "count": 28, + "mean_delta_ms": 1.416, + "std_delta_ms": 0.169, + "weight": 25.02 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_5", + "count": 58, + "mean_delta_ms": 0.692, + "std_delta_ms": 0.72, + "weight": 28.44 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_5.keys", + "count": 58, + "mean_delta_ms": 0.698, + "std_delta_ms": 0.72, + "weight": 28.55 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_5.values", + "count": 58, + "mean_delta_ms": 0.703, + "std_delta_ms": 0.72, + "weight": 28.66 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_5.ffn_up", + "count": 58, + "mean_delta_ms": 0.708, + "std_delta_ms": 0.72, + "weight": 28.76 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_5.ffn_down", + "count": 58, + "mean_delta_ms": 0.713, + "std_delta_ms": 0.72, + "weight": 28.86 + }, + { + "source": "server.layer_5.v", + "target": "server.buffer", + "count": 60, + "mean_delta_ms": 0.864, + "std_delta_ms": 0.694, + "weight": 33.26 + }, + { + "source": "server.layer_5.v", + "target": "server.buffer.logits", + "count": 56, + "mean_delta_ms": 0.868, + "std_delta_ms": 0.692, + "weight": 31.15 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_0", + "count": 54, + "mean_delta_ms": 0.883, + "std_delta_ms": 0.694, + "weight": 30.23 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_0.q", + "count": 54, + "mean_delta_ms": 0.888, + "std_delta_ms": 0.694, + "weight": 30.31 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_0.k", + "count": 54, + "mean_delta_ms": 0.893, + "std_delta_ms": 0.694, + "weight": 30.38 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_0.v", + "count": 53, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.684, + "weight": 29.82 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_0", + "count": 53, + "mean_delta_ms": 0.888, + "std_delta_ms": 0.685, + "weight": 29.92 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_0.keys", + "count": 53, + "mean_delta_ms": 0.893, + "std_delta_ms": 0.684, + "weight": 30.01 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_0.values", + "count": 53, + "mean_delta_ms": 0.898, + "std_delta_ms": 0.684, + "weight": 30.08 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_0.ffn_up", + "count": 52, + "mean_delta_ms": 0.882, + "std_delta_ms": 0.673, + "weight": 29.48 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_0.ffn_down", + "count": 52, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.673, + "weight": 29.55 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_1", + "count": 49, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.649, + "weight": 29.78 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_1.q", + "count": 49, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.649, + "weight": 29.86 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_1.k", + "count": 48, + "mean_delta_ms": 0.997, + "std_delta_ms": 0.64, + "weight": 29.24 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_1.v", + "count": 48, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.64, + "weight": 29.29 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_1", + "count": 48, + "mean_delta_ms": 1.009, + "std_delta_ms": 0.639, + "weight": 29.38 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_1.keys", + "count": 48, + "mean_delta_ms": 1.014, + "std_delta_ms": 0.639, + "weight": 29.44 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_1.values", + "count": 47, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.63, + "weight": 28.82 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_1.ffn_up", + "count": 47, + "mean_delta_ms": 1.003, + "std_delta_ms": 0.63, + "weight": 28.87 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_1.ffn_down", + "count": 47, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.63, + "weight": 28.93 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_2", + "count": 39, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.542, + "weight": 25.52 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_2.q", + "count": 39, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.542, + "weight": 25.58 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_2.k", + "count": 39, + "mean_delta_ms": 1.039, + "std_delta_ms": 0.542, + "weight": 25.63 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_2.v", + "count": 39, + "mean_delta_ms": 1.044, + "std_delta_ms": 0.542, + "weight": 25.68 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_2", + "count": 39, + "mean_delta_ms": 1.05, + "std_delta_ms": 0.541, + "weight": 25.73 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_2.keys", + "count": 38, + "mean_delta_ms": 1.031, + "std_delta_ms": 0.526, + "weight": 25.16 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_2.values", + "count": 38, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.526, + "weight": 25.21 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_2.ffn_up", + "count": 37, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.508, + "weight": 24.66 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_2.ffn_down", + "count": 37, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.505, + "weight": 24.81 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.174, + "weight": 24.52 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 0.96, + "std_delta_ms": 0.175, + "weight": 24.53 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 0.965, + "std_delta_ms": 0.175, + "weight": 24.56 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 0.972, + "std_delta_ms": 0.175, + "weight": 24.57 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 0.978, + "std_delta_ms": 0.175, + "weight": 24.59 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 0.983, + "std_delta_ms": 0.175, + "weight": 24.61 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 0.989, + "std_delta_ms": 0.176, + "weight": 24.63 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 0.994, + "std_delta_ms": 0.176, + "weight": 24.65 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 0.999, + "std_delta_ms": 0.176, + "weight": 24.66 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_4", + "count": 29, + "mean_delta_ms": 1.185, + "std_delta_ms": 0.193, + "weight": 24.95 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_4.q", + "count": 29, + "mean_delta_ms": 1.192, + "std_delta_ms": 0.194, + "weight": 24.95 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_4.k", + "count": 29, + "mean_delta_ms": 1.198, + "std_delta_ms": 0.195, + "weight": 24.94 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_4.v", + "count": 29, + "mean_delta_ms": 1.203, + "std_delta_ms": 0.195, + "weight": 24.95 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_4", + "count": 29, + "mean_delta_ms": 1.21, + "std_delta_ms": 0.197, + "weight": 24.94 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_4.keys", + "count": 29, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.197, + "weight": 24.95 + }, + { + "source": "server.layer_5.v", + "target": "server.kv_cache_4.values", + "count": 29, + "mean_delta_ms": 1.221, + "std_delta_ms": 0.198, + "weight": 24.95 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_4.ffn_up", + "count": 29, + "mean_delta_ms": 1.226, + "std_delta_ms": 0.198, + "weight": 24.97 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_4.ffn_down", + "count": 29, + "mean_delta_ms": 1.231, + "std_delta_ms": 0.198, + "weight": 24.98 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_5", + "count": 28, + "mean_delta_ms": 1.402, + "std_delta_ms": 0.166, + "weight": 25.03 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_5.q", + "count": 28, + "mean_delta_ms": 1.411, + "std_delta_ms": 0.169, + "weight": 25.01 + }, + { + "source": "server.layer_5.v", + "target": "server.layer_5.k", + "count": 28, + "mean_delta_ms": 1.416, + "std_delta_ms": 0.169, + "weight": 25.01 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_5.keys", + "count": 58, + "mean_delta_ms": 0.691, + "std_delta_ms": 0.72, + "weight": 28.42 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_5.values", + "count": 58, + "mean_delta_ms": 0.697, + "std_delta_ms": 0.72, + "weight": 28.53 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_5.ffn_up", + "count": 58, + "mean_delta_ms": 0.701, + "std_delta_ms": 0.72, + "weight": 28.63 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_5.ffn_down", + "count": 58, + "mean_delta_ms": 0.706, + "std_delta_ms": 0.72, + "weight": 28.73 + }, + { + "source": "server.kv_cache_5", + "target": "server.buffer", + "count": 60, + "mean_delta_ms": 0.857, + "std_delta_ms": 0.694, + "weight": 33.15 + }, + { + "source": "server.kv_cache_5", + "target": "server.buffer.logits", + "count": 56, + "mean_delta_ms": 0.861, + "std_delta_ms": 0.692, + "weight": 31.05 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_0", + "count": 54, + "mean_delta_ms": 0.876, + "std_delta_ms": 0.694, + "weight": 30.13 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_0.q", + "count": 54, + "mean_delta_ms": 0.882, + "std_delta_ms": 0.694, + "weight": 30.22 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_0.k", + "count": 54, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.694, + "weight": 30.29 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_0.v", + "count": 53, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.684, + "weight": 29.73 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_0", + "count": 53, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.684, + "weight": 29.83 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_0.keys", + "count": 53, + "mean_delta_ms": 0.887, + "std_delta_ms": 0.684, + "weight": 29.92 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_0.values", + "count": 53, + "mean_delta_ms": 0.892, + "std_delta_ms": 0.684, + "weight": 29.99 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_0.ffn_up", + "count": 53, + "mean_delta_ms": 0.896, + "std_delta_ms": 0.684, + "weight": 30.06 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_0.ffn_down", + "count": 53, + "mean_delta_ms": 0.901, + "std_delta_ms": 0.684, + "weight": 30.13 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_1", + "count": 50, + "mean_delta_ms": 1.019, + "std_delta_ms": 0.657, + "weight": 30.39 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_1.q", + "count": 50, + "mean_delta_ms": 1.026, + "std_delta_ms": 0.657, + "weight": 30.47 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_1.k", + "count": 49, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.649, + "weight": 29.85 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_1.v", + "count": 49, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.649, + "weight": 29.9 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_1", + "count": 48, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.639, + "weight": 29.3 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_1.keys", + "count": 48, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.639, + "weight": 29.36 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_1.values", + "count": 48, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.639, + "weight": 29.42 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_1.ffn_up", + "count": 47, + "mean_delta_ms": 0.996, + "std_delta_ms": 0.63, + "weight": 28.8 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_1.ffn_down", + "count": 47, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.63, + "weight": 28.86 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_2", + "count": 40, + "mean_delta_ms": 1.045, + "std_delta_ms": 0.557, + "weight": 26.1 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_2.q", + "count": 39, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.542, + "weight": 25.53 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_2.k", + "count": 39, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.542, + "weight": 25.58 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_2.v", + "count": 39, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.542, + "weight": 25.63 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_2", + "count": 39, + "mean_delta_ms": 1.044, + "std_delta_ms": 0.541, + "weight": 25.68 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_2.keys", + "count": 39, + "mean_delta_ms": 1.049, + "std_delta_ms": 0.541, + "weight": 25.73 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_2.values", + "count": 38, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.526, + "weight": 25.15 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_2.ffn_up", + "count": 38, + "mean_delta_ms": 1.034, + "std_delta_ms": 0.526, + "weight": 25.2 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_2.ffn_down", + "count": 37, + "mean_delta_ms": 1.021, + "std_delta_ms": 0.505, + "weight": 24.76 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 0.946, + "std_delta_ms": 0.174, + "weight": 24.5 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 0.953, + "std_delta_ms": 0.174, + "weight": 24.51 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 0.959, + "std_delta_ms": 0.174, + "weight": 24.54 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 0.966, + "std_delta_ms": 0.175, + "weight": 24.56 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 0.972, + "std_delta_ms": 0.175, + "weight": 24.58 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 0.977, + "std_delta_ms": 0.175, + "weight": 24.59 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 0.982, + "std_delta_ms": 0.175, + "weight": 24.61 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 0.987, + "std_delta_ms": 0.175, + "weight": 24.63 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 0.992, + "std_delta_ms": 0.175, + "weight": 24.64 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_4", + "count": 29, + "mean_delta_ms": 1.179, + "std_delta_ms": 0.192, + "weight": 24.93 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_4.q", + "count": 29, + "mean_delta_ms": 1.186, + "std_delta_ms": 0.193, + "weight": 24.93 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_4.k", + "count": 29, + "mean_delta_ms": 1.192, + "std_delta_ms": 0.195, + "weight": 24.93 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_4.v", + "count": 29, + "mean_delta_ms": 1.197, + "std_delta_ms": 0.195, + "weight": 24.94 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_4", + "count": 29, + "mean_delta_ms": 1.204, + "std_delta_ms": 0.196, + "weight": 24.93 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_4.keys", + "count": 29, + "mean_delta_ms": 1.209, + "std_delta_ms": 0.197, + "weight": 24.94 + }, + { + "source": "server.kv_cache_5", + "target": "server.kv_cache_4.values", + "count": 29, + "mean_delta_ms": 1.215, + "std_delta_ms": 0.198, + "weight": 24.94 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_4.ffn_up", + "count": 29, + "mean_delta_ms": 1.22, + "std_delta_ms": 0.198, + "weight": 24.96 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_4.ffn_down", + "count": 29, + "mean_delta_ms": 1.225, + "std_delta_ms": 0.198, + "weight": 24.97 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_5", + "count": 28, + "mean_delta_ms": 1.395, + "std_delta_ms": 0.166, + "weight": 25.03 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_5.q", + "count": 28, + "mean_delta_ms": 1.404, + "std_delta_ms": 0.168, + "weight": 25.0 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_5.k", + "count": 28, + "mean_delta_ms": 1.41, + "std_delta_ms": 0.169, + "weight": 25.01 + }, + { + "source": "server.kv_cache_5", + "target": "server.layer_5.v", + "count": 28, + "mean_delta_ms": 1.415, + "std_delta_ms": 0.169, + "weight": 25.01 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_5.values", + "count": 58, + "mean_delta_ms": 0.691, + "std_delta_ms": 0.72, + "weight": 28.41 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_5.ffn_up", + "count": 58, + "mean_delta_ms": 0.696, + "std_delta_ms": 0.72, + "weight": 28.51 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_5.ffn_down", + "count": 58, + "mean_delta_ms": 0.701, + "std_delta_ms": 0.72, + "weight": 28.62 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.buffer", + "count": 60, + "mean_delta_ms": 0.852, + "std_delta_ms": 0.694, + "weight": 33.06 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.buffer.logits", + "count": 56, + "mean_delta_ms": 0.856, + "std_delta_ms": 0.692, + "weight": 30.96 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_0", + "count": 54, + "mean_delta_ms": 0.871, + "std_delta_ms": 0.694, + "weight": 30.04 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_0.q", + "count": 54, + "mean_delta_ms": 0.876, + "std_delta_ms": 0.694, + "weight": 30.13 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_0.k", + "count": 54, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.694, + "weight": 30.2 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_0.v", + "count": 53, + "mean_delta_ms": 0.869, + "std_delta_ms": 0.684, + "weight": 29.65 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_0", + "count": 53, + "mean_delta_ms": 0.876, + "std_delta_ms": 0.684, + "weight": 29.75 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_0.keys", + "count": 53, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.684, + "weight": 29.84 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_0.values", + "count": 53, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.684, + "weight": 29.91 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_0.ffn_up", + "count": 53, + "mean_delta_ms": 0.891, + "std_delta_ms": 0.684, + "weight": 29.98 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_0.ffn_down", + "count": 53, + "mean_delta_ms": 0.896, + "std_delta_ms": 0.684, + "weight": 30.05 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_1", + "count": 50, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.657, + "weight": 30.33 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_1.q", + "count": 50, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.657, + "weight": 30.41 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_1.k", + "count": 50, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.657, + "weight": 30.47 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_1.v", + "count": 49, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.649, + "weight": 29.84 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_1", + "count": 48, + "mean_delta_ms": 0.997, + "std_delta_ms": 0.639, + "weight": 29.24 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_1.keys", + "count": 48, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.639, + "weight": 29.3 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_1.values", + "count": 48, + "mean_delta_ms": 1.007, + "std_delta_ms": 0.639, + "weight": 29.36 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_1.ffn_up", + "count": 48, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.639, + "weight": 29.41 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_1.ffn_down", + "count": 48, + "mean_delta_ms": 1.016, + "std_delta_ms": 0.639, + "weight": 29.47 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_2", + "count": 41, + "mean_delta_ms": 1.063, + "std_delta_ms": 0.569, + "weight": 26.7 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_2.q", + "count": 40, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.556, + "weight": 26.11 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_2.k", + "count": 39, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.542, + "weight": 25.53 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_2.v", + "count": 39, + "mean_delta_ms": 1.032, + "std_delta_ms": 0.541, + "weight": 25.58 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_2", + "count": 39, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.541, + "weight": 25.64 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_2.keys", + "count": 39, + "mean_delta_ms": 1.044, + "std_delta_ms": 0.541, + "weight": 25.68 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_2.values", + "count": 39, + "mean_delta_ms": 1.049, + "std_delta_ms": 0.541, + "weight": 25.73 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_2.ffn_up", + "count": 38, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.525, + "weight": 25.15 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_2.ffn_down", + "count": 38, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.522, + "weight": 25.31 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 0.94, + "std_delta_ms": 0.173, + "weight": 24.48 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 0.948, + "std_delta_ms": 0.174, + "weight": 24.5 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 0.953, + "std_delta_ms": 0.174, + "weight": 24.52 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 0.96, + "std_delta_ms": 0.175, + "weight": 24.54 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 0.966, + "std_delta_ms": 0.175, + "weight": 24.56 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 0.971, + "std_delta_ms": 0.175, + "weight": 24.58 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 0.977, + "std_delta_ms": 0.175, + "weight": 24.59 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 0.982, + "std_delta_ms": 0.175, + "weight": 24.61 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 0.987, + "std_delta_ms": 0.175, + "weight": 24.63 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_4", + "count": 29, + "mean_delta_ms": 1.173, + "std_delta_ms": 0.192, + "weight": 24.92 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_4.q", + "count": 29, + "mean_delta_ms": 1.18, + "std_delta_ms": 0.193, + "weight": 24.92 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_4.k", + "count": 29, + "mean_delta_ms": 1.186, + "std_delta_ms": 0.194, + "weight": 24.92 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_4.v", + "count": 29, + "mean_delta_ms": 1.191, + "std_delta_ms": 0.195, + "weight": 24.93 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_4", + "count": 29, + "mean_delta_ms": 1.198, + "std_delta_ms": 0.196, + "weight": 24.92 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_4.keys", + "count": 29, + "mean_delta_ms": 1.203, + "std_delta_ms": 0.197, + "weight": 24.93 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_4.values", + "count": 29, + "mean_delta_ms": 1.209, + "std_delta_ms": 0.197, + "weight": 24.93 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_4.ffn_up", + "count": 29, + "mean_delta_ms": 1.214, + "std_delta_ms": 0.197, + "weight": 24.94 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_4.ffn_down", + "count": 29, + "mean_delta_ms": 1.219, + "std_delta_ms": 0.198, + "weight": 24.96 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_5", + "count": 28, + "mean_delta_ms": 1.39, + "std_delta_ms": 0.166, + "weight": 25.02 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_5.q", + "count": 28, + "mean_delta_ms": 1.399, + "std_delta_ms": 0.168, + "weight": 25.0 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_5.k", + "count": 28, + "mean_delta_ms": 1.404, + "std_delta_ms": 0.169, + "weight": 25.0 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.layer_5.v", + "count": 28, + "mean_delta_ms": 1.409, + "std_delta_ms": 0.169, + "weight": 25.0 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.kv_cache_5", + "count": 28, + "mean_delta_ms": 1.415, + "std_delta_ms": 0.169, + "weight": 25.01 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_5.ffn_up", + "count": 58, + "mean_delta_ms": 0.691, + "std_delta_ms": 0.72, + "weight": 28.4 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_5.ffn_down", + "count": 58, + "mean_delta_ms": 0.696, + "std_delta_ms": 0.72, + "weight": 28.51 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.buffer", + "count": 60, + "mean_delta_ms": 0.846, + "std_delta_ms": 0.694, + "weight": 32.97 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.buffer.logits", + "count": 56, + "mean_delta_ms": 0.851, + "std_delta_ms": 0.692, + "weight": 30.88 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_0", + "count": 54, + "mean_delta_ms": 0.865, + "std_delta_ms": 0.694, + "weight": 29.97 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_0.q", + "count": 54, + "mean_delta_ms": 0.871, + "std_delta_ms": 0.694, + "weight": 30.05 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_0.k", + "count": 54, + "mean_delta_ms": 0.876, + "std_delta_ms": 0.694, + "weight": 30.13 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_0.v", + "count": 54, + "mean_delta_ms": 0.884, + "std_delta_ms": 0.695, + "weight": 30.24 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_0", + "count": 53, + "mean_delta_ms": 0.87, + "std_delta_ms": 0.684, + "weight": 29.67 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_0.keys", + "count": 53, + "mean_delta_ms": 0.876, + "std_delta_ms": 0.684, + "weight": 29.76 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_0.values", + "count": 53, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.684, + "weight": 29.83 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_0.ffn_up", + "count": 53, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.684, + "weight": 29.9 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_0.ffn_down", + "count": 53, + "mean_delta_ms": 0.891, + "std_delta_ms": 0.684, + "weight": 29.97 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_1", + "count": 50, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.657, + "weight": 30.27 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_1.q", + "count": 50, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.657, + "weight": 30.35 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_1.k", + "count": 50, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.657, + "weight": 30.41 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_1.v", + "count": 50, + "mean_delta_ms": 1.025, + "std_delta_ms": 0.657, + "weight": 30.47 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_1", + "count": 49, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.649, + "weight": 29.86 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_1.keys", + "count": 48, + "mean_delta_ms": 0.997, + "std_delta_ms": 0.639, + "weight": 29.24 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_1.values", + "count": 48, + "mean_delta_ms": 1.002, + "std_delta_ms": 0.639, + "weight": 29.3 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_1.ffn_up", + "count": 48, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.639, + "weight": 29.35 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_1.ffn_down", + "count": 48, + "mean_delta_ms": 1.011, + "std_delta_ms": 0.639, + "weight": 29.41 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_2", + "count": 41, + "mean_delta_ms": 1.057, + "std_delta_ms": 0.569, + "weight": 26.65 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_2.q", + "count": 40, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.556, + "weight": 26.07 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_2.k", + "count": 40, + "mean_delta_ms": 1.046, + "std_delta_ms": 0.556, + "weight": 26.11 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_2.v", + "count": 39, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.542, + "weight": 25.54 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_2", + "count": 39, + "mean_delta_ms": 1.033, + "std_delta_ms": 0.541, + "weight": 25.59 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_2.keys", + "count": 39, + "mean_delta_ms": 1.039, + "std_delta_ms": 0.541, + "weight": 25.64 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_2.values", + "count": 39, + "mean_delta_ms": 1.044, + "std_delta_ms": 0.541, + "weight": 25.68 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_2.ffn_up", + "count": 39, + "mean_delta_ms": 1.048, + "std_delta_ms": 0.541, + "weight": 25.72 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_2.ffn_down", + "count": 38, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.522, + "weight": 25.27 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 0.935, + "std_delta_ms": 0.173, + "weight": 24.46 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 0.942, + "std_delta_ms": 0.174, + "weight": 24.48 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 0.948, + "std_delta_ms": 0.174, + "weight": 24.5 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 0.955, + "std_delta_ms": 0.175, + "weight": 24.52 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 0.961, + "std_delta_ms": 0.175, + "weight": 24.54 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 0.966, + "std_delta_ms": 0.175, + "weight": 24.56 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 0.971, + "std_delta_ms": 0.175, + "weight": 24.57 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 0.976, + "std_delta_ms": 0.175, + "weight": 24.59 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 0.981, + "std_delta_ms": 0.175, + "weight": 24.61 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_4", + "count": 29, + "mean_delta_ms": 1.168, + "std_delta_ms": 0.192, + "weight": 24.91 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_4.q", + "count": 29, + "mean_delta_ms": 1.175, + "std_delta_ms": 0.193, + "weight": 24.91 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_4.k", + "count": 29, + "mean_delta_ms": 1.181, + "std_delta_ms": 0.194, + "weight": 24.9 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_4.v", + "count": 29, + "mean_delta_ms": 1.186, + "std_delta_ms": 0.195, + "weight": 24.91 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_4", + "count": 29, + "mean_delta_ms": 1.193, + "std_delta_ms": 0.196, + "weight": 24.9 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_4.keys", + "count": 29, + "mean_delta_ms": 1.198, + "std_delta_ms": 0.197, + "weight": 24.91 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_4.values", + "count": 29, + "mean_delta_ms": 1.204, + "std_delta_ms": 0.197, + "weight": 24.91 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_4.ffn_up", + "count": 29, + "mean_delta_ms": 1.209, + "std_delta_ms": 0.197, + "weight": 24.93 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_4.ffn_down", + "count": 29, + "mean_delta_ms": 1.214, + "std_delta_ms": 0.198, + "weight": 24.94 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_5", + "count": 28, + "mean_delta_ms": 1.385, + "std_delta_ms": 0.166, + "weight": 25.01 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_5.q", + "count": 28, + "mean_delta_ms": 1.394, + "std_delta_ms": 0.168, + "weight": 24.99 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_5.k", + "count": 28, + "mean_delta_ms": 1.399, + "std_delta_ms": 0.169, + "weight": 24.99 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.layer_5.v", + "count": 28, + "mean_delta_ms": 1.404, + "std_delta_ms": 0.169, + "weight": 24.99 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_5", + "count": 28, + "mean_delta_ms": 1.41, + "std_delta_ms": 0.169, + "weight": 25.0 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.kv_cache_5.keys", + "count": 28, + "mean_delta_ms": 1.415, + "std_delta_ms": 0.17, + "weight": 25.0 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_5.ffn_down", + "count": 58, + "mean_delta_ms": 0.691, + "std_delta_ms": 0.72, + "weight": 28.41 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.buffer", + "count": 60, + "mean_delta_ms": 0.841, + "std_delta_ms": 0.694, + "weight": 32.88 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.buffer.logits", + "count": 56, + "mean_delta_ms": 0.846, + "std_delta_ms": 0.692, + "weight": 30.8 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_0", + "count": 54, + "mean_delta_ms": 0.86, + "std_delta_ms": 0.694, + "weight": 29.89 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_0.q", + "count": 54, + "mean_delta_ms": 0.866, + "std_delta_ms": 0.694, + "weight": 29.98 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_0.k", + "count": 54, + "mean_delta_ms": 0.871, + "std_delta_ms": 0.694, + "weight": 30.05 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_0.v", + "count": 54, + "mean_delta_ms": 0.88, + "std_delta_ms": 0.695, + "weight": 30.17 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_0", + "count": 53, + "mean_delta_ms": 0.865, + "std_delta_ms": 0.684, + "weight": 29.6 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_0.keys", + "count": 53, + "mean_delta_ms": 0.871, + "std_delta_ms": 0.684, + "weight": 29.69 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_0.values", + "count": 53, + "mean_delta_ms": 0.876, + "std_delta_ms": 0.684, + "weight": 29.76 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_0.ffn_up", + "count": 53, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.684, + "weight": 29.83 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_0.ffn_down", + "count": 53, + "mean_delta_ms": 0.886, + "std_delta_ms": 0.684, + "weight": 29.9 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_1", + "count": 50, + "mean_delta_ms": 1.003, + "std_delta_ms": 0.657, + "weight": 30.21 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_1.q", + "count": 50, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.657, + "weight": 30.29 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_1.k", + "count": 50, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.657, + "weight": 30.35 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_1.v", + "count": 50, + "mean_delta_ms": 1.02, + "std_delta_ms": 0.657, + "weight": 30.41 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_1", + "count": 50, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.657, + "weight": 30.49 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_1.keys", + "count": 49, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.648, + "weight": 29.87 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_1.values", + "count": 48, + "mean_delta_ms": 0.997, + "std_delta_ms": 0.639, + "weight": 29.24 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_1.ffn_up", + "count": 48, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.639, + "weight": 29.3 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_1.ffn_down", + "count": 48, + "mean_delta_ms": 1.006, + "std_delta_ms": 0.639, + "weight": 29.36 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_2", + "count": 42, + "mean_delta_ms": 1.075, + "std_delta_ms": 0.581, + "weight": 27.27 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_2.q", + "count": 41, + "mean_delta_ms": 1.059, + "std_delta_ms": 0.569, + "weight": 26.67 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_2.k", + "count": 40, + "mean_delta_ms": 1.041, + "std_delta_ms": 0.556, + "weight": 26.07 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_2.v", + "count": 40, + "mean_delta_ms": 1.047, + "std_delta_ms": 0.556, + "weight": 26.12 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_2", + "count": 39, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.541, + "weight": 25.55 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_2.keys", + "count": 39, + "mean_delta_ms": 1.034, + "std_delta_ms": 0.541, + "weight": 25.6 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_2.values", + "count": 39, + "mean_delta_ms": 1.039, + "std_delta_ms": 0.541, + "weight": 25.64 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_2.ffn_up", + "count": 39, + "mean_delta_ms": 1.044, + "std_delta_ms": 0.541, + "weight": 25.68 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_2.ffn_down", + "count": 39, + "mean_delta_ms": 1.056, + "std_delta_ms": 0.538, + "weight": 25.84 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 0.93, + "std_delta_ms": 0.173, + "weight": 24.44 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 0.938, + "std_delta_ms": 0.174, + "weight": 24.46 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 0.943, + "std_delta_ms": 0.174, + "weight": 24.48 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 0.95, + "std_delta_ms": 0.174, + "weight": 24.5 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 0.956, + "std_delta_ms": 0.175, + "weight": 24.52 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 0.961, + "std_delta_ms": 0.175, + "weight": 24.54 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 0.966, + "std_delta_ms": 0.175, + "weight": 24.56 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 0.971, + "std_delta_ms": 0.175, + "weight": 24.58 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 0.977, + "std_delta_ms": 0.175, + "weight": 24.59 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_4", + "count": 29, + "mean_delta_ms": 1.163, + "std_delta_ms": 0.192, + "weight": 24.89 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_4.q", + "count": 29, + "mean_delta_ms": 1.17, + "std_delta_ms": 0.193, + "weight": 24.9 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_4.k", + "count": 29, + "mean_delta_ms": 1.176, + "std_delta_ms": 0.194, + "weight": 24.89 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_4.v", + "count": 29, + "mean_delta_ms": 1.181, + "std_delta_ms": 0.195, + "weight": 24.9 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_4", + "count": 29, + "mean_delta_ms": 1.188, + "std_delta_ms": 0.196, + "weight": 24.89 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_4.keys", + "count": 29, + "mean_delta_ms": 1.193, + "std_delta_ms": 0.196, + "weight": 24.9 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_4.values", + "count": 29, + "mean_delta_ms": 1.199, + "std_delta_ms": 0.197, + "weight": 24.9 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_4.ffn_up", + "count": 29, + "mean_delta_ms": 1.204, + "std_delta_ms": 0.197, + "weight": 24.92 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_4.ffn_down", + "count": 29, + "mean_delta_ms": 1.209, + "std_delta_ms": 0.197, + "weight": 24.93 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_5", + "count": 28, + "mean_delta_ms": 1.38, + "std_delta_ms": 0.166, + "weight": 25.0 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_5.q", + "count": 28, + "mean_delta_ms": 1.389, + "std_delta_ms": 0.168, + "weight": 24.98 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_5.k", + "count": 28, + "mean_delta_ms": 1.394, + "std_delta_ms": 0.169, + "weight": 24.98 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.layer_5.v", + "count": 28, + "mean_delta_ms": 1.399, + "std_delta_ms": 0.169, + "weight": 24.99 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_5", + "count": 28, + "mean_delta_ms": 1.405, + "std_delta_ms": 0.169, + "weight": 24.99 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_5.keys", + "count": 28, + "mean_delta_ms": 1.411, + "std_delta_ms": 0.17, + "weight": 24.99 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.kv_cache_5.values", + "count": 28, + "mean_delta_ms": 1.416, + "std_delta_ms": 0.17, + "weight": 25.0 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.buffer", + "count": 60, + "mean_delta_ms": 0.836, + "std_delta_ms": 0.694, + "weight": 32.79 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.buffer.logits", + "count": 56, + "mean_delta_ms": 0.841, + "std_delta_ms": 0.692, + "weight": 30.72 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_0", + "count": 54, + "mean_delta_ms": 0.855, + "std_delta_ms": 0.694, + "weight": 29.81 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_0.q", + "count": 54, + "mean_delta_ms": 0.861, + "std_delta_ms": 0.694, + "weight": 29.9 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_0.k", + "count": 54, + "mean_delta_ms": 0.866, + "std_delta_ms": 0.694, + "weight": 29.97 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_0.v", + "count": 54, + "mean_delta_ms": 0.874, + "std_delta_ms": 0.695, + "weight": 30.09 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_0", + "count": 54, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.695, + "weight": 30.19 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_0.keys", + "count": 53, + "mean_delta_ms": 0.866, + "std_delta_ms": 0.684, + "weight": 29.61 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_0.values", + "count": 53, + "mean_delta_ms": 0.871, + "std_delta_ms": 0.684, + "weight": 29.68 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_0.ffn_up", + "count": 53, + "mean_delta_ms": 0.876, + "std_delta_ms": 0.684, + "weight": 29.75 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_0.ffn_down", + "count": 53, + "mean_delta_ms": 0.881, + "std_delta_ms": 0.684, + "weight": 29.83 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_1", + "count": 50, + "mean_delta_ms": 0.998, + "std_delta_ms": 0.657, + "weight": 30.15 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_1.q", + "count": 50, + "mean_delta_ms": 1.005, + "std_delta_ms": 0.657, + "weight": 30.23 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_1.k", + "count": 50, + "mean_delta_ms": 1.01, + "std_delta_ms": 0.657, + "weight": 30.29 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_1.v", + "count": 50, + "mean_delta_ms": 1.015, + "std_delta_ms": 0.657, + "weight": 30.35 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_1", + "count": 50, + "mean_delta_ms": 1.022, + "std_delta_ms": 0.657, + "weight": 30.43 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_1.keys", + "count": 50, + "mean_delta_ms": 1.027, + "std_delta_ms": 0.657, + "weight": 30.5 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_1.values", + "count": 49, + "mean_delta_ms": 1.012, + "std_delta_ms": 0.648, + "weight": 29.87 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_1.ffn_up", + "count": 48, + "mean_delta_ms": 0.996, + "std_delta_ms": 0.639, + "weight": 29.24 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_1.ffn_down", + "count": 48, + "mean_delta_ms": 1.001, + "std_delta_ms": 0.639, + "weight": 29.3 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_2", + "count": 44, + "mean_delta_ms": 1.112, + "std_delta_ms": 0.599, + "weight": 28.59 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_2.q", + "count": 41, + "mean_delta_ms": 1.054, + "std_delta_ms": 0.569, + "weight": 26.63 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_2.k", + "count": 41, + "mean_delta_ms": 1.06, + "std_delta_ms": 0.569, + "weight": 26.67 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_2.v", + "count": 40, + "mean_delta_ms": 1.042, + "std_delta_ms": 0.556, + "weight": 26.08 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_2", + "count": 40, + "mean_delta_ms": 1.048, + "std_delta_ms": 0.556, + "weight": 26.13 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_2.keys", + "count": 39, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.541, + "weight": 25.55 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_2.values", + "count": 39, + "mean_delta_ms": 1.034, + "std_delta_ms": 0.541, + "weight": 25.6 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_2.ffn_up", + "count": 39, + "mean_delta_ms": 1.038, + "std_delta_ms": 0.541, + "weight": 25.64 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_2.ffn_down", + "count": 39, + "mean_delta_ms": 1.051, + "std_delta_ms": 0.538, + "weight": 25.8 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_3", + "count": 29, + "mean_delta_ms": 0.925, + "std_delta_ms": 0.173, + "weight": 24.43 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_3.q", + "count": 29, + "mean_delta_ms": 0.932, + "std_delta_ms": 0.174, + "weight": 24.45 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_3.k", + "count": 29, + "mean_delta_ms": 0.938, + "std_delta_ms": 0.174, + "weight": 24.47 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_3.v", + "count": 29, + "mean_delta_ms": 0.945, + "std_delta_ms": 0.174, + "weight": 24.49 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_3", + "count": 29, + "mean_delta_ms": 0.951, + "std_delta_ms": 0.174, + "weight": 24.51 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_3.keys", + "count": 29, + "mean_delta_ms": 0.956, + "std_delta_ms": 0.174, + "weight": 24.53 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_3.values", + "count": 29, + "mean_delta_ms": 0.961, + "std_delta_ms": 0.174, + "weight": 24.55 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_3.ffn_up", + "count": 29, + "mean_delta_ms": 0.966, + "std_delta_ms": 0.174, + "weight": 24.57 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_3.ffn_down", + "count": 29, + "mean_delta_ms": 0.971, + "std_delta_ms": 0.175, + "weight": 24.58 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_4", + "count": 29, + "mean_delta_ms": 1.158, + "std_delta_ms": 0.192, + "weight": 24.88 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_4.q", + "count": 29, + "mean_delta_ms": 1.165, + "std_delta_ms": 0.192, + "weight": 24.89 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_4.k", + "count": 29, + "mean_delta_ms": 1.171, + "std_delta_ms": 0.194, + "weight": 24.88 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_4.v", + "count": 29, + "mean_delta_ms": 1.176, + "std_delta_ms": 0.194, + "weight": 24.89 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_4", + "count": 29, + "mean_delta_ms": 1.183, + "std_delta_ms": 0.196, + "weight": 24.88 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_4.keys", + "count": 29, + "mean_delta_ms": 1.188, + "std_delta_ms": 0.196, + "weight": 24.89 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_4.values", + "count": 29, + "mean_delta_ms": 1.194, + "std_delta_ms": 0.197, + "weight": 24.89 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_4.ffn_up", + "count": 29, + "mean_delta_ms": 1.199, + "std_delta_ms": 0.197, + "weight": 24.91 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_4.ffn_down", + "count": 29, + "mean_delta_ms": 1.204, + "std_delta_ms": 0.197, + "weight": 24.92 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_5", + "count": 28, + "mean_delta_ms": 1.375, + "std_delta_ms": 0.165, + "weight": 24.99 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_5.q", + "count": 28, + "mean_delta_ms": 1.384, + "std_delta_ms": 0.168, + "weight": 24.97 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_5.k", + "count": 28, + "mean_delta_ms": 1.389, + "std_delta_ms": 0.168, + "weight": 24.97 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_5.v", + "count": 28, + "mean_delta_ms": 1.394, + "std_delta_ms": 0.169, + "weight": 24.98 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_5", + "count": 28, + "mean_delta_ms": 1.4, + "std_delta_ms": 0.169, + "weight": 24.98 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_5.keys", + "count": 28, + "mean_delta_ms": 1.406, + "std_delta_ms": 0.169, + "weight": 24.99 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.kv_cache_5.values", + "count": 28, + "mean_delta_ms": 1.411, + "std_delta_ms": 0.17, + "weight": 24.99 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.layer_5.ffn_up", + "count": 28, + "mean_delta_ms": 1.415, + "std_delta_ms": 0.17, + "weight": 25.0 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_0", + "count": 55, + "mean_delta_ms": 0.672, + "std_delta_ms": 0.701, + "weight": 26.92 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_0.q", + "count": 55, + "mean_delta_ms": 0.678, + "std_delta_ms": 0.701, + "weight": 27.04 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_0.k", + "count": 55, + "mean_delta_ms": 0.683, + "std_delta_ms": 0.701, + "weight": 27.14 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_0.v", + "count": 55, + "mean_delta_ms": 0.691, + "std_delta_ms": 0.702, + "weight": 27.29 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_0", + "count": 55, + "mean_delta_ms": 0.698, + "std_delta_ms": 0.702, + "weight": 27.43 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_0.keys", + "count": 55, + "mean_delta_ms": 0.704, + "std_delta_ms": 0.701, + "weight": 27.55 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_0.values", + "count": 55, + "mean_delta_ms": 0.709, + "std_delta_ms": 0.701, + "weight": 27.64 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_0.ffn_up", + "count": 55, + "mean_delta_ms": 0.713, + "std_delta_ms": 0.701, + "weight": 27.73 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_0.ffn_down", + "count": 55, + "mean_delta_ms": 0.718, + "std_delta_ms": 0.701, + "weight": 27.83 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_1", + "count": 55, + "mean_delta_ms": 0.896, + "std_delta_ms": 0.701, + "weight": 30.86 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_1.q", + "count": 55, + "mean_delta_ms": 0.903, + "std_delta_ms": 0.701, + "weight": 30.96 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_1.k", + "count": 55, + "mean_delta_ms": 0.908, + "std_delta_ms": 0.701, + "weight": 31.04 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_1.v", + "count": 54, + "mean_delta_ms": 0.893, + "std_delta_ms": 0.691, + "weight": 30.43 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_1", + "count": 54, + "mean_delta_ms": 0.9, + "std_delta_ms": 0.691, + "weight": 30.54 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_1.keys", + "count": 54, + "mean_delta_ms": 0.905, + "std_delta_ms": 0.691, + "weight": 30.62 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_1.values", + "count": 54, + "mean_delta_ms": 0.91, + "std_delta_ms": 0.691, + "weight": 30.69 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_1.ffn_up", + "count": 54, + "mean_delta_ms": 0.914, + "std_delta_ms": 0.691, + "weight": 30.76 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_1.ffn_down", + "count": 54, + "mean_delta_ms": 0.92, + "std_delta_ms": 0.691, + "weight": 30.83 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_2", + "count": 48, + "mean_delta_ms": 1.0, + "std_delta_ms": 0.644, + "weight": 29.19 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_2.q", + "count": 47, + "mean_delta_ms": 0.986, + "std_delta_ms": 0.634, + "weight": 28.6 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_2.k", + "count": 47, + "mean_delta_ms": 0.991, + "std_delta_ms": 0.634, + "weight": 28.66 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_2.v", + "count": 46, + "mean_delta_ms": 0.975, + "std_delta_ms": 0.623, + "weight": 28.06 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_2", + "count": 46, + "mean_delta_ms": 0.981, + "std_delta_ms": 0.623, + "weight": 28.13 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_2.keys", + "count": 46, + "mean_delta_ms": 0.986, + "std_delta_ms": 0.623, + "weight": 28.19 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_2.values", + "count": 46, + "mean_delta_ms": 0.991, + "std_delta_ms": 0.623, + "weight": 28.24 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_2.ffn_up", + "count": 46, + "mean_delta_ms": 0.996, + "std_delta_ms": 0.623, + "weight": 28.3 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_2.ffn_down", + "count": 45, + "mean_delta_ms": 0.989, + "std_delta_ms": 0.614, + "weight": 27.77 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_3", + "count": 39, + "mean_delta_ms": 1.028, + "std_delta_ms": 0.539, + "weight": 25.6 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_3.q", + "count": 39, + "mean_delta_ms": 1.036, + "std_delta_ms": 0.538, + "weight": 25.67 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_3.k", + "count": 39, + "mean_delta_ms": 1.042, + "std_delta_ms": 0.538, + "weight": 25.71 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_3.v", + "count": 38, + "mean_delta_ms": 1.023, + "std_delta_ms": 0.522, + "weight": 25.16 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_3", + "count": 38, + "mean_delta_ms": 1.029, + "std_delta_ms": 0.521, + "weight": 25.22 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_3.keys", + "count": 37, + "mean_delta_ms": 1.008, + "std_delta_ms": 0.503, + "weight": 24.68 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_3.values", + "count": 37, + "mean_delta_ms": 1.013, + "std_delta_ms": 0.503, + "weight": 24.72 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_3.ffn_up", + "count": 36, + "mean_delta_ms": 0.991, + "std_delta_ms": 0.482, + "weight": 24.22 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_3.ffn_down", + "count": 36, + "mean_delta_ms": 0.995, + "std_delta_ms": 0.482, + "weight": 24.26 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_4", + "count": 29, + "mean_delta_ms": 0.95, + "std_delta_ms": 0.127, + "weight": 25.58 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_4.q", + "count": 29, + "mean_delta_ms": 0.957, + "std_delta_ms": 0.128, + "weight": 25.57 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_4.k", + "count": 29, + "mean_delta_ms": 0.963, + "std_delta_ms": 0.13, + "weight": 25.54 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_4.v", + "count": 29, + "mean_delta_ms": 0.968, + "std_delta_ms": 0.131, + "weight": 25.54 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_4", + "count": 29, + "mean_delta_ms": 0.974, + "std_delta_ms": 0.131, + "weight": 25.56 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_4.keys", + "count": 29, + "mean_delta_ms": 0.98, + "std_delta_ms": 0.132, + "weight": 25.57 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_4.values", + "count": 29, + "mean_delta_ms": 0.985, + "std_delta_ms": 0.132, + "weight": 25.57 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_4.ffn_up", + "count": 29, + "mean_delta_ms": 0.991, + "std_delta_ms": 0.132, + "weight": 25.58 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_4.ffn_down", + "count": 29, + "mean_delta_ms": 0.996, + "std_delta_ms": 0.133, + "weight": 25.59 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_5", + "count": 29, + "mean_delta_ms": 1.191, + "std_delta_ms": 0.153, + "weight": 25.71 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_5.q", + "count": 29, + "mean_delta_ms": 1.2, + "std_delta_ms": 0.155, + "weight": 25.68 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_5.k", + "count": 29, + "mean_delta_ms": 1.205, + "std_delta_ms": 0.155, + "weight": 25.69 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_5.v", + "count": 29, + "mean_delta_ms": 1.21, + "std_delta_ms": 0.156, + "weight": 25.7 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_5", + "count": 29, + "mean_delta_ms": 1.217, + "std_delta_ms": 0.156, + "weight": 25.7 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_5.keys", + "count": 29, + "mean_delta_ms": 1.222, + "std_delta_ms": 0.156, + "weight": 25.71 + }, + { + "source": "server.buffer.logits", + "target": "server.kv_cache_5.values", + "count": 29, + "mean_delta_ms": 1.227, + "std_delta_ms": 0.157, + "weight": 25.71 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_5.ffn_up", + "count": 29, + "mean_delta_ms": 1.232, + "std_delta_ms": 0.157, + "weight": 25.72 + }, + { + "source": "server.buffer.logits", + "target": "server.layer_5.ffn_down", + "count": 29, + "mean_delta_ms": 1.237, + "std_delta_ms": 0.157, + "weight": 25.72 + }, + { + "source": "server.buffer.logits", + "target": "server.buffer", + "count": 32, + "mean_delta_ms": 1.32, + "std_delta_ms": 0.368, + "weight": 25.02 + }, + { + "source": "server.layer_4", + "target": "server.config", + "count": 8, + "mean_delta_ms": 1.175, + "std_delta_ms": 0.68, + "weight": 5.07 + }, + { + "source": "server.layer_4", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 1.175, + "std_delta_ms": 0.68, + "weight": 2.53 + }, + { + "source": "server.layer_4", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 1.188, + "std_delta_ms": 0.68, + "weight": 2.54 + }, + { + "source": "server.layer_4", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 1.199, + "std_delta_ms": 0.68, + "weight": 2.55 + }, + { + "source": "server.layer_4.q", + "target": "server.config", + "count": 8, + "mean_delta_ms": 1.168, + "std_delta_ms": 0.681, + "weight": 5.05 + }, + { + "source": "server.layer_4.q", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 1.169, + "std_delta_ms": 0.681, + "weight": 2.53 + }, + { + "source": "server.layer_4.q", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 1.181, + "std_delta_ms": 0.681, + "weight": 2.54 + }, + { + "source": "server.layer_4.q", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 1.193, + "std_delta_ms": 0.681, + "weight": 2.55 + }, + { + "source": "server.layer_4.k", + "target": "server.config", + "count": 8, + "mean_delta_ms": 1.163, + "std_delta_ms": 0.681, + "weight": 5.05 + }, + { + "source": "server.layer_4.k", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 1.164, + "std_delta_ms": 0.681, + "weight": 2.52 + }, + { + "source": "server.layer_4.k", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 1.176, + "std_delta_ms": 0.681, + "weight": 2.53 + }, + { + "source": "server.layer_4.k", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 1.188, + "std_delta_ms": 0.681, + "weight": 2.54 + }, + { + "source": "server.layer_4.v", + "target": "server.config", + "count": 8, + "mean_delta_ms": 1.158, + "std_delta_ms": 0.681, + "weight": 5.04 + }, + { + "source": "server.layer_4.v", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 1.158, + "std_delta_ms": 0.681, + "weight": 2.52 + }, + { + "source": "server.layer_4.v", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 1.171, + "std_delta_ms": 0.681, + "weight": 2.53 + }, + { + "source": "server.layer_4.v", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 1.183, + "std_delta_ms": 0.681, + "weight": 2.54 + }, + { + "source": "server.kv_cache_4", + "target": "server.config", + "count": 8, + "mean_delta_ms": 1.151, + "std_delta_ms": 0.682, + "weight": 5.03 + }, + { + "source": "server.kv_cache_4", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 1.152, + "std_delta_ms": 0.681, + "weight": 2.51 + }, + { + "source": "server.kv_cache_4", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 1.164, + "std_delta_ms": 0.682, + "weight": 2.52 + }, + { + "source": "server.kv_cache_4", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 1.176, + "std_delta_ms": 0.682, + "weight": 2.53 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.config", + "count": 8, + "mean_delta_ms": 1.146, + "std_delta_ms": 0.682, + "weight": 5.02 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 1.146, + "std_delta_ms": 0.682, + "weight": 2.51 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 1.159, + "std_delta_ms": 0.682, + "weight": 2.52 + }, + { + "source": "server.kv_cache_4.keys", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 1.17, + "std_delta_ms": 0.682, + "weight": 2.53 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.config", + "count": 8, + "mean_delta_ms": 1.141, + "std_delta_ms": 0.682, + "weight": 5.01 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 1.141, + "std_delta_ms": 0.682, + "weight": 2.5 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 1.154, + "std_delta_ms": 0.682, + "weight": 2.51 + }, + { + "source": "server.kv_cache_4.values", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 1.165, + "std_delta_ms": 0.682, + "weight": 2.52 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.config", + "count": 8, + "mean_delta_ms": 1.136, + "std_delta_ms": 0.682, + "weight": 5.0 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 1.136, + "std_delta_ms": 0.682, + "weight": 2.5 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 1.149, + "std_delta_ms": 0.682, + "weight": 2.51 + }, + { + "source": "server.layer_4.ffn_up", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 1.16, + "std_delta_ms": 0.682, + "weight": 2.52 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.config", + "count": 8, + "mean_delta_ms": 1.13, + "std_delta_ms": 0.683, + "weight": 4.99 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 1.131, + "std_delta_ms": 0.683, + "weight": 2.49 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 1.143, + "std_delta_ms": 0.683, + "weight": 2.5 + }, + { + "source": "server.layer_4.ffn_down", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 1.155, + "std_delta_ms": 0.683, + "weight": 2.51 + }, + { + "source": "server.layer_5", + "target": "server.config", + "count": 8, + "mean_delta_ms": 0.951, + "std_delta_ms": 0.691, + "weight": 4.63 + }, + { + "source": "server.layer_5", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.691, + "weight": 2.32 + }, + { + "source": "server.layer_5", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 0.964, + "std_delta_ms": 0.691, + "weight": 2.33 + }, + { + "source": "server.layer_5", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 0.976, + "std_delta_ms": 0.691, + "weight": 2.34 + }, + { + "source": "server.layer_5.q", + "target": "server.config", + "count": 8, + "mean_delta_ms": 0.938, + "std_delta_ms": 0.698, + "weight": 4.59 + }, + { + "source": "server.layer_5.q", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 0.939, + "std_delta_ms": 0.697, + "weight": 2.29 + }, + { + "source": "server.layer_5.q", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 0.951, + "std_delta_ms": 0.698, + "weight": 2.31 + }, + { + "source": "server.layer_5.q", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 0.963, + "std_delta_ms": 0.698, + "weight": 2.32 + }, + { + "source": "server.layer_5.k", + "target": "server.config", + "count": 8, + "mean_delta_ms": 0.932, + "std_delta_ms": 0.698, + "weight": 4.57 + }, + { + "source": "server.layer_5.k", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 0.933, + "std_delta_ms": 0.698, + "weight": 2.29 + }, + { + "source": "server.layer_5.k", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 0.946, + "std_delta_ms": 0.698, + "weight": 2.3 + }, + { + "source": "server.layer_5.k", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 0.957, + "std_delta_ms": 0.698, + "weight": 2.31 + }, + { + "source": "server.layer_5.v", + "target": "server.config", + "count": 8, + "mean_delta_ms": 0.927, + "std_delta_ms": 0.699, + "weight": 4.56 + }, + { + "source": "server.layer_5.v", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 0.928, + "std_delta_ms": 0.699, + "weight": 2.28 + }, + { + "source": "server.layer_5.v", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 0.94, + "std_delta_ms": 0.699, + "weight": 2.29 + }, + { + "source": "server.layer_5.v", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 0.952, + "std_delta_ms": 0.699, + "weight": 2.31 + }, + { + "source": "server.kv_cache_5", + "target": "server.config", + "count": 8, + "mean_delta_ms": 0.92, + "std_delta_ms": 0.699, + "weight": 4.55 + }, + { + "source": "server.kv_cache_5", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 0.921, + "std_delta_ms": 0.699, + "weight": 2.27 + }, + { + "source": "server.kv_cache_5", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 0.934, + "std_delta_ms": 0.699, + "weight": 2.29 + }, + { + "source": "server.kv_cache_5", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 0.945, + "std_delta_ms": 0.699, + "weight": 2.3 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.config", + "count": 8, + "mean_delta_ms": 0.915, + "std_delta_ms": 0.7, + "weight": 4.53 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 0.915, + "std_delta_ms": 0.7, + "weight": 2.27 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 0.928, + "std_delta_ms": 0.7, + "weight": 2.28 + }, + { + "source": "server.kv_cache_5.keys", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 0.939, + "std_delta_ms": 0.7, + "weight": 2.29 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.config", + "count": 8, + "mean_delta_ms": 0.909, + "std_delta_ms": 0.7, + "weight": 4.52 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 0.91, + "std_delta_ms": 0.7, + "weight": 2.26 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 0.922, + "std_delta_ms": 0.7, + "weight": 2.27 + }, + { + "source": "server.kv_cache_5.values", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 0.934, + "std_delta_ms": 0.701, + "weight": 2.29 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.config", + "count": 8, + "mean_delta_ms": 0.904, + "std_delta_ms": 0.701, + "weight": 4.51 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 0.904, + "std_delta_ms": 0.701, + "weight": 2.25 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 0.917, + "std_delta_ms": 0.701, + "weight": 2.27 + }, + { + "source": "server.layer_5.ffn_up", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 0.929, + "std_delta_ms": 0.701, + "weight": 2.28 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.config", + "count": 8, + "mean_delta_ms": 0.898, + "std_delta_ms": 0.701, + "weight": 4.49 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 0.899, + "std_delta_ms": 0.701, + "weight": 2.25 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 0.912, + "std_delta_ms": 0.701, + "weight": 2.26 + }, + { + "source": "server.layer_5.ffn_down", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 0.923, + "std_delta_ms": 0.701, + "weight": 2.27 + }, + { + "source": "server.buffer", + "target": "server.config", + "count": 8, + "mean_delta_ms": 0.725, + "std_delta_ms": 0.703, + "weight": 4.06 + }, + { + "source": "server.buffer", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 0.726, + "std_delta_ms": 0.703, + "weight": 2.03 + }, + { + "source": "server.buffer", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 0.738, + "std_delta_ms": 0.703, + "weight": 2.05 + }, + { + "source": "server.buffer.logits", + "target": "server.config", + "count": 8, + "mean_delta_ms": 0.717, + "std_delta_ms": 0.704, + "weight": 4.04 + }, + { + "source": "server.buffer.logits", + "target": "server.config.max_tokens", + "count": 4, + "mean_delta_ms": 0.718, + "std_delta_ms": 0.704, + "weight": 2.02 + }, + { + "source": "server.buffer.logits", + "target": "server.config.temperature", + "count": 4, + "mean_delta_ms": 0.73, + "std_delta_ms": 0.704, + "weight": 2.04 + }, + { + "source": "server.buffer.logits", + "target": "server.buffer.input_ids", + "count": 4, + "mean_delta_ms": 0.742, + "std_delta_ms": 0.704, + "weight": 2.05 + }, + { + "source": "server.layer_0", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.408, + "std_delta_ms": 0.079, + "weight": 3.79 + }, + { + "source": "server.layer_0", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.409, + "std_delta_ms": 0.079, + "weight": 1.89 + }, + { + "source": "server.layer_0", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.421, + "std_delta_ms": 0.08, + "weight": 1.89 + }, + { + "source": "server.layer_0", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.433, + "std_delta_ms": 0.081, + "weight": 1.89 + }, + { + "source": "server.layer_0.q", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.403, + "std_delta_ms": 0.079, + "weight": 3.79 + }, + { + "source": "server.layer_0.q", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.403, + "std_delta_ms": 0.078, + "weight": 1.89 + }, + { + "source": "server.layer_0.q", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.416, + "std_delta_ms": 0.079, + "weight": 1.89 + }, + { + "source": "server.layer_0.q", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.427, + "std_delta_ms": 0.081, + "weight": 1.89 + }, + { + "source": "server.layer_0.k", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.397, + "std_delta_ms": 0.078, + "weight": 3.79 + }, + { + "source": "server.layer_0.k", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.398, + "std_delta_ms": 0.078, + "weight": 1.89 + }, + { + "source": "server.layer_0.k", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.411, + "std_delta_ms": 0.079, + "weight": 1.89 + }, + { + "source": "server.layer_0.k", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.422, + "std_delta_ms": 0.08, + "weight": 1.89 + }, + { + "source": "server.layer_0.v", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.392, + "std_delta_ms": 0.077, + "weight": 3.79 + }, + { + "source": "server.layer_0.v", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.393, + "std_delta_ms": 0.077, + "weight": 1.9 + }, + { + "source": "server.layer_0.v", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.405, + "std_delta_ms": 0.078, + "weight": 1.89 + }, + { + "source": "server.layer_0.v", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.417, + "std_delta_ms": 0.08, + "weight": 1.89 + }, + { + "source": "server.kv_cache_0", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.386, + "std_delta_ms": 0.077, + "weight": 3.79 + }, + { + "source": "server.kv_cache_0", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.386, + "std_delta_ms": 0.076, + "weight": 1.9 + }, + { + "source": "server.kv_cache_0", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.399, + "std_delta_ms": 0.077, + "weight": 1.9 + }, + { + "source": "server.kv_cache_0", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.41, + "std_delta_ms": 0.079, + "weight": 1.89 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.38, + "std_delta_ms": 0.076, + "weight": 3.79 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.381, + "std_delta_ms": 0.075, + "weight": 1.9 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.393, + "std_delta_ms": 0.076, + "weight": 1.9 + }, + { + "source": "server.kv_cache_0.keys", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.405, + "std_delta_ms": 0.078, + "weight": 1.89 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.375, + "std_delta_ms": 0.075, + "weight": 3.79 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.375, + "std_delta_ms": 0.075, + "weight": 1.9 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.388, + "std_delta_ms": 0.076, + "weight": 1.9 + }, + { + "source": "server.kv_cache_0.values", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.399, + "std_delta_ms": 0.078, + "weight": 1.89 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.37, + "std_delta_ms": 0.075, + "weight": 3.79 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.371, + "std_delta_ms": 0.075, + "weight": 1.9 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.383, + "std_delta_ms": 0.076, + "weight": 1.9 + }, + { + "source": "server.layer_0.ffn_up", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.395, + "std_delta_ms": 0.077, + "weight": 1.89 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.363, + "std_delta_ms": 0.073, + "weight": 3.8 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.364, + "std_delta_ms": 0.073, + "weight": 1.9 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.376, + "std_delta_ms": 0.074, + "weight": 1.9 + }, + { + "source": "server.layer_0.ffn_down", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.388, + "std_delta_ms": 0.075, + "weight": 1.9 + }, + { + "source": "server.layer_1", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.176, + "std_delta_ms": 0.058, + "weight": 3.81 + }, + { + "source": "server.layer_1", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.177, + "std_delta_ms": 0.058, + "weight": 1.91 + }, + { + "source": "server.layer_1", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.189, + "std_delta_ms": 0.059, + "weight": 1.91 + }, + { + "source": "server.layer_1", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.201, + "std_delta_ms": 0.06, + "weight": 1.9 + }, + { + "source": "server.layer_1.q", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.169, + "std_delta_ms": 0.057, + "weight": 3.81 + }, + { + "source": "server.layer_1.q", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.169, + "std_delta_ms": 0.056, + "weight": 1.91 + }, + { + "source": "server.layer_1.q", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.182, + "std_delta_ms": 0.057, + "weight": 1.91 + }, + { + "source": "server.layer_1.q", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.193, + "std_delta_ms": 0.059, + "weight": 1.91 + }, + { + "source": "server.layer_1.k", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.161, + "std_delta_ms": 0.053, + "weight": 3.82 + }, + { + "source": "server.layer_1.k", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.161, + "std_delta_ms": 0.053, + "weight": 1.91 + }, + { + "source": "server.layer_1.k", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.174, + "std_delta_ms": 0.054, + "weight": 1.91 + }, + { + "source": "server.layer_1.k", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.185, + "std_delta_ms": 0.056, + "weight": 1.91 + }, + { + "source": "server.layer_1.v", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.155, + "std_delta_ms": 0.053, + "weight": 3.83 + }, + { + "source": "server.layer_1.v", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.156, + "std_delta_ms": 0.052, + "weight": 1.91 + }, + { + "source": "server.layer_1.v", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.168, + "std_delta_ms": 0.053, + "weight": 1.91 + }, + { + "source": "server.layer_1.v", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.18, + "std_delta_ms": 0.055, + "weight": 1.91 + }, + { + "source": "server.kv_cache_1", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.148, + "std_delta_ms": 0.052, + "weight": 3.83 + }, + { + "source": "server.kv_cache_1", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.149, + "std_delta_ms": 0.052, + "weight": 1.91 + }, + { + "source": "server.kv_cache_1", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.161, + "std_delta_ms": 0.053, + "weight": 1.91 + }, + { + "source": "server.kv_cache_1", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.173, + "std_delta_ms": 0.054, + "weight": 1.91 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.142, + "std_delta_ms": 0.052, + "weight": 3.83 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.142, + "std_delta_ms": 0.052, + "weight": 1.91 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.155, + "std_delta_ms": 0.053, + "weight": 1.91 + }, + { + "source": "server.kv_cache_1.keys", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.167, + "std_delta_ms": 0.054, + "weight": 1.91 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.137, + "std_delta_ms": 0.052, + "weight": 3.83 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.137, + "std_delta_ms": 0.051, + "weight": 1.91 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.15, + "std_delta_ms": 0.052, + "weight": 1.91 + }, + { + "source": "server.kv_cache_1.values", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.161, + "std_delta_ms": 0.054, + "weight": 1.91 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.132, + "std_delta_ms": 0.051, + "weight": 3.83 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.132, + "std_delta_ms": 0.051, + "weight": 1.91 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.145, + "std_delta_ms": 0.052, + "weight": 1.91 + }, + { + "source": "server.layer_1.ffn_up", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.156, + "std_delta_ms": 0.053, + "weight": 1.91 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.config", + "count": 4, + "mean_delta_ms": 1.127, + "std_delta_ms": 0.051, + "weight": 3.83 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 1.127, + "std_delta_ms": 0.051, + "weight": 1.91 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 1.14, + "std_delta_ms": 0.052, + "weight": 1.91 + }, + { + "source": "server.layer_1.ffn_down", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 1.151, + "std_delta_ms": 0.053, + "weight": 1.91 + }, + { + "source": "server.layer_2", + "target": "server.config", + "count": 4, + "mean_delta_ms": 0.95, + "std_delta_ms": 0.047, + "weight": 3.81 + }, + { + "source": "server.layer_2", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 0.951, + "std_delta_ms": 0.047, + "weight": 1.91 + }, + { + "source": "server.layer_2", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 0.963, + "std_delta_ms": 0.048, + "weight": 1.91 + }, + { + "source": "server.layer_2", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 0.975, + "std_delta_ms": 0.049, + "weight": 1.9 + }, + { + "source": "server.layer_2.q", + "target": "server.config", + "count": 4, + "mean_delta_ms": 0.942, + "std_delta_ms": 0.046, + "weight": 3.81 + }, + { + "source": "server.layer_2.q", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 0.943, + "std_delta_ms": 0.046, + "weight": 1.91 + }, + { + "source": "server.layer_2.q", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 0.955, + "std_delta_ms": 0.047, + "weight": 1.91 + }, + { + "source": "server.layer_2.q", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 0.967, + "std_delta_ms": 0.048, + "weight": 1.9 + }, + { + "source": "server.layer_2.k", + "target": "server.config", + "count": 4, + "mean_delta_ms": 0.936, + "std_delta_ms": 0.046, + "weight": 3.81 + }, + { + "source": "server.layer_2.k", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 0.937, + "std_delta_ms": 0.045, + "weight": 1.91 + }, + { + "source": "server.layer_2.k", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 0.949, + "std_delta_ms": 0.046, + "weight": 1.91 + }, + { + "source": "server.layer_2.k", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 0.961, + "std_delta_ms": 0.048, + "weight": 1.91 + }, + { + "source": "server.layer_2.v", + "target": "server.config", + "count": 4, + "mean_delta_ms": 0.931, + "std_delta_ms": 0.045, + "weight": 3.81 + }, + { + "source": "server.layer_2.v", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 0.931, + "std_delta_ms": 0.045, + "weight": 1.91 + }, + { + "source": "server.layer_2.v", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 0.944, + "std_delta_ms": 0.046, + "weight": 1.91 + }, + { + "source": "server.layer_2.v", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 0.955, + "std_delta_ms": 0.047, + "weight": 1.91 + }, + { + "source": "server.kv_cache_2", + "target": "server.config", + "count": 4, + "mean_delta_ms": 0.924, + "std_delta_ms": 0.044, + "weight": 3.82 + }, + { + "source": "server.kv_cache_2", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 0.925, + "std_delta_ms": 0.044, + "weight": 1.91 + }, + { + "source": "server.kv_cache_2", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 0.937, + "std_delta_ms": 0.045, + "weight": 1.91 + }, + { + "source": "server.kv_cache_2", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 0.949, + "std_delta_ms": 0.046, + "weight": 1.91 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.config", + "count": 4, + "mean_delta_ms": 0.917, + "std_delta_ms": 0.043, + "weight": 3.82 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 0.918, + "std_delta_ms": 0.042, + "weight": 1.91 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 0.93, + "std_delta_ms": 0.043, + "weight": 1.91 + }, + { + "source": "server.kv_cache_2.keys", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 0.942, + "std_delta_ms": 0.045, + "weight": 1.91 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.config", + "count": 4, + "mean_delta_ms": 0.912, + "std_delta_ms": 0.042, + "weight": 3.82 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 0.913, + "std_delta_ms": 0.042, + "weight": 1.91 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 0.925, + "std_delta_ms": 0.043, + "weight": 1.91 + }, + { + "source": "server.kv_cache_2.values", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 0.937, + "std_delta_ms": 0.044, + "weight": 1.91 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.config", + "count": 4, + "mean_delta_ms": 0.905, + "std_delta_ms": 0.044, + "weight": 3.82 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 0.906, + "std_delta_ms": 0.043, + "weight": 1.91 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 0.918, + "std_delta_ms": 0.044, + "weight": 1.91 + }, + { + "source": "server.layer_2.ffn_up", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 0.93, + "std_delta_ms": 0.046, + "weight": 1.91 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.config", + "count": 4, + "mean_delta_ms": 0.9, + "std_delta_ms": 0.043, + "weight": 3.82 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.config.max_tokens", + "count": 2, + "mean_delta_ms": 0.901, + "std_delta_ms": 0.043, + "weight": 1.91 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.config.temperature", + "count": 2, + "mean_delta_ms": 0.913, + "std_delta_ms": 0.044, + "weight": 1.91 + }, + { + "source": "server.layer_2.ffn_down", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 0.925, + "std_delta_ms": 0.046, + "weight": 1.91 + }, + { + "source": "server.layer_3", + "target": "server.config", + "count": 6, + "mean_delta_ms": 1.144, + "std_delta_ms": 0.594, + "weight": 3.95 + }, + { + "source": "server.layer_3", + "target": "server.config.max_tokens", + "count": 3, + "mean_delta_ms": 1.145, + "std_delta_ms": 0.594, + "weight": 1.98 + }, + { + "source": "server.layer_3", + "target": "server.config.temperature", + "count": 3, + "mean_delta_ms": 1.157, + "std_delta_ms": 0.594, + "weight": 1.98 + }, + { + "source": "server.layer_3", + "target": "server.buffer.input_ids", + "count": 2, + "mean_delta_ms": 0.75, + "std_delta_ms": 0.041, + "weight": 1.9 + }, + { + "source": "server.layer_3.q", + "target": "server.config", + "count": 6, + "mean_delta_ms": 1.137, + "std_delta_ms": 0.595, + "weight": 3.94 + }, + { + "source": "server.layer_3.q", + "target": "server.config.max_tokens", + "count": 3, + "mean_delta_ms": 1.138, + "std_delta_ms": 0.595, + "weight": 1.97 + }, + { + "source": "server.layer_3.q", + "target": "server.config.temperature", + "count": 3, + "mean_delta_ms": 1.15, + "std_delta_ms": 0.594, + "weight": 1.98 + }, + { + "source": "server.layer_3.q", + "target": "server.buffer.input_ids", + "count": 3, + "mean_delta_ms": 1.161, + "std_delta_ms": 0.594, + "weight": 1.99 + }, + { + "source": "server.layer_3.k", + "target": "server.config", + "count": 6, + "mean_delta_ms": 1.132, + "std_delta_ms": 0.595, + "weight": 3.93 + }, + { + "source": "server.layer_3.k", + "target": "server.config.max_tokens", + "count": 3, + "mean_delta_ms": 1.133, + "std_delta_ms": 0.595, + "weight": 1.97 + }, + { + "source": "server.layer_3.k", + "target": "server.config.temperature", + "count": 3, + "mean_delta_ms": 1.145, + "std_delta_ms": 0.594, + "weight": 1.97 + }, + { + "source": "server.layer_3.k", + "target": "server.buffer.input_ids", + "count": 3, + "mean_delta_ms": 1.156, + "std_delta_ms": 0.594, + "weight": 1.98 + }, + { + "source": "server.layer_3.v", + "target": "server.config", + "count": 6, + "mean_delta_ms": 1.126, + "std_delta_ms": 0.595, + "weight": 3.92 + }, + { + "source": "server.layer_3.v", + "target": "server.config.max_tokens", + "count": 3, + "mean_delta_ms": 1.127, + "std_delta_ms": 0.595, + "weight": 1.96 + }, + { + "source": "server.layer_3.v", + "target": "server.config.temperature", + "count": 3, + "mean_delta_ms": 1.139, + "std_delta_ms": 0.595, + "weight": 1.97 + }, + { + "source": "server.layer_3.v", + "target": "server.buffer.input_ids", + "count": 3, + "mean_delta_ms": 1.15, + "std_delta_ms": 0.594, + "weight": 1.98 + }, + { + "source": "server.kv_cache_3", + "target": "server.config", + "count": 6, + "mean_delta_ms": 1.119, + "std_delta_ms": 0.596, + "weight": 3.91 + }, + { + "source": "server.kv_cache_3", + "target": "server.config.max_tokens", + "count": 3, + "mean_delta_ms": 1.12, + "std_delta_ms": 0.596, + "weight": 1.96 + }, + { + "source": "server.kv_cache_3", + "target": "server.config.temperature", + "count": 3, + "mean_delta_ms": 1.132, + "std_delta_ms": 0.596, + "weight": 1.97 + }, + { + "source": "server.kv_cache_3", + "target": "server.buffer.input_ids", + "count": 3, + "mean_delta_ms": 1.143, + "std_delta_ms": 0.595, + "weight": 1.97 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.config", + "count": 6, + "mean_delta_ms": 1.113, + "std_delta_ms": 0.597, + "weight": 3.91 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.config.max_tokens", + "count": 3, + "mean_delta_ms": 1.114, + "std_delta_ms": 0.597, + "weight": 1.95 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.config.temperature", + "count": 3, + "mean_delta_ms": 1.126, + "std_delta_ms": 0.596, + "weight": 1.96 + }, + { + "source": "server.kv_cache_3.keys", + "target": "server.buffer.input_ids", + "count": 3, + "mean_delta_ms": 1.137, + "std_delta_ms": 0.595, + "weight": 1.97 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.config", + "count": 6, + "mean_delta_ms": 1.108, + "std_delta_ms": 0.597, + "weight": 3.9 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.config.max_tokens", + "count": 3, + "mean_delta_ms": 1.109, + "std_delta_ms": 0.597, + "weight": 1.95 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.config.temperature", + "count": 3, + "mean_delta_ms": 1.121, + "std_delta_ms": 0.596, + "weight": 1.96 + }, + { + "source": "server.kv_cache_3.values", + "target": "server.buffer.input_ids", + "count": 3, + "mean_delta_ms": 1.132, + "std_delta_ms": 0.596, + "weight": 1.97 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.config", + "count": 6, + "mean_delta_ms": 1.103, + "std_delta_ms": 0.597, + "weight": 3.89 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.config.max_tokens", + "count": 3, + "mean_delta_ms": 1.103, + "std_delta_ms": 0.597, + "weight": 1.95 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.config.temperature", + "count": 3, + "mean_delta_ms": 1.115, + "std_delta_ms": 0.597, + "weight": 1.95 + }, + { + "source": "server.layer_3.ffn_up", + "target": "server.buffer.input_ids", + "count": 3, + "mean_delta_ms": 1.127, + "std_delta_ms": 0.596, + "weight": 1.96 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.config", + "count": 6, + "mean_delta_ms": 1.097, + "std_delta_ms": 0.598, + "weight": 3.88 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.config.max_tokens", + "count": 3, + "mean_delta_ms": 1.098, + "std_delta_ms": 0.598, + "weight": 1.94 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.config.temperature", + "count": 3, + "mean_delta_ms": 1.11, + "std_delta_ms": 0.597, + "weight": 1.95 + }, + { + "source": "server.layer_3.ffn_down", + "target": "server.buffer.input_ids", + "count": 3, + "mean_delta_ms": 1.121, + "std_delta_ms": 0.597, + "weight": 1.96 + } + ], + "clusters": [ + { + "id": 0, + "members": [ + "server.buffer", + "server.buffer.input_ids", + "server.buffer.logits", + "server.config", + "server.config.max_tokens", + "server.config.temperature", + "server.kv_cache_0", + "server.kv_cache_0.keys", + "server.kv_cache_0.values", + "server.kv_cache_1", + "server.kv_cache_1.keys", + "server.kv_cache_1.values", + "server.kv_cache_2", + "server.kv_cache_2.keys", + "server.kv_cache_2.values", + "server.kv_cache_3", + "server.kv_cache_3.keys", + "server.kv_cache_3.values", + "server.kv_cache_4", + "server.kv_cache_4.keys", + "server.kv_cache_4.values", + "server.kv_cache_5", + "server.kv_cache_5.keys", + "server.kv_cache_5.values", + "server.layer_0", + "server.layer_0.ffn_down", + "server.layer_0.ffn_up", + "server.layer_0.k", + "server.layer_0.q", + "server.layer_0.v", + "server.layer_1", + "server.layer_1.ffn_down", + "server.layer_1.ffn_up", + "server.layer_1.k", + "server.layer_1.q", + "server.layer_1.v", + "server.layer_2", + "server.layer_2.ffn_down", + "server.layer_2.ffn_up", + "server.layer_2.k", + "server.layer_2.q", + "server.layer_2.v", + "server.layer_3", + "server.layer_3.ffn_down", + "server.layer_3.ffn_up", + "server.layer_3.k", + "server.layer_3.q", + "server.layer_3.v", + "server.layer_4", + "server.layer_4.ffn_down", + "server.layer_4.ffn_up", + "server.layer_4.k", + "server.layer_4.q", + "server.layer_4.v", + "server.layer_5", + "server.layer_5.ffn_down", + "server.layer_5.ffn_up", + "server.layer_5.k", + "server.layer_5.q", + "server.layer_5.v" + ], + "size": 60, + "total_coaccesses": 257166 + } + ], + "chains": [ + [ + [ + "server.config.max_tokens", + 0.0 + ], + [ + "server.layer_1.ffn_down", + 1.0518131666666668 + ], + [ + "server.layer_2.ffn_down", + 0.9121313157894737 + ], + [ + "server.layer_4", + 1.013014425925926 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ] + ], + [ + [ + "server.config.temperature", + 0.0 + ], + [ + "server.layer_1.ffn_down", + 1.0373188333333334 + ], + [ + "server.layer_2.ffn_down", + 0.9121313157894737 + ], + [ + "server.layer_4", + 1.013014425925926 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ] + ], + [ + [ + "server.buffer.input_ids", + 0.0 + ], + [ + "server.layer_1.ffn_down", + 1.0217141666666667 + ], + [ + "server.layer_2.ffn_down", + 0.9121313157894737 + ], + [ + "server.layer_4", + 1.013014425925926 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ] + ], + [ + [ + "server.config", + 0.0 + ], + [ + "server.layer_1.ffn_down", + 1.0561253333333334 + ], + [ + "server.layer_2.ffn_down", + 0.9121313157894737 + ], + [ + "server.layer_4", + 1.013014425925926 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ] + ], + [ + [ + "server.layer_0", + 0.0 + ], + [ + "server.layer_1.v", + 0.9049087719298247 + ], + [ + "server.layer_2.ffn_down", + 0.9398842631578948 + ], + [ + "server.layer_4", + 1.013014425925926 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ] + ], + [ + [ + "server.layer_0.v", + 0.0 + ], + [ + "server.kv_cache_1.values", + 0.9026495263157894 + ], + [ + "server.layer_2.ffn_down", + 0.9223287192982457 + ], + [ + "server.layer_4", + 1.013014425925926 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ] + ], + [ + [ + "server.kv_cache_0", + 0.0 + ], + [ + "server.layer_1.ffn_down", + 0.9055946140350878 + ], + [ + "server.layer_2.ffn_down", + 0.9121313157894737 + ], + [ + "server.layer_4", + 1.013014425925926 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ] + ], + [ + [ + "server.layer_0.q", + 0.0 + ], + [ + "server.kv_cache_1.keys", + 0.9112812807017544 + ], + [ + "server.layer_2.ffn_down", + 0.9273247719298247 + ], + [ + "server.layer_4", + 1.013014425925926 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ] + ], + [ + [ + "server.kv_cache_0.keys", + 0.0 + ], + [ + "server.layer_1.ffn_down", + 0.8990886666666666 + ], + [ + "server.layer_2.ffn_down", + 0.9121313157894737 + ], + [ + "server.layer_4", + 1.013014425925926 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ] + ], + [ + [ + "server.layer_0.k", + 0.0 + ], + [ + "server.kv_cache_1.keys", + 0.9062800701754387 + ], + [ + "server.layer_2.ffn_down", + 0.9273247719298247 + ], + [ + "server.layer_4", + 1.013014425925926 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ] + ], + [ + [ + "server.kv_cache_0.values", + 0.0 + ], + [ + "server.layer_1.ffn_down", + 0.8941633859649123 + ], + [ + "server.layer_2.ffn_down", + 0.9121313157894737 + ], + [ + "server.layer_4", + 1.013014425925926 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ] + ], + [ + [ + "server.layer_0.ffn_up", + 0.0 + ], + [ + "server.layer_2", + 1.0326862592592592 + ], + [ + "server.layer_3.ffn_down", + 0.9412946315789474 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer", + 0.87392035 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_up", + 0.900607754385965 + ], + [ + "server.layer_4.ffn_down", + 0.895272649122807 + ], + [ + "server.buffer.logits", + 1.0170518867924527 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ] + ], + [ + [ + "server.layer_0.ffn_down", + 0.0 + ], + [ + "server.layer_2", + 1.0275460555555556 + ], + [ + "server.layer_3.ffn_down", + 0.9412946315789474 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer", + 0.87392035 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_up", + 0.900607754385965 + ], + [ + "server.layer_4.ffn_down", + 0.895272649122807 + ], + [ + "server.buffer.logits", + 1.0170518867924527 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ] + ], + [ + [ + "server.layer_2.q", + 0.0 + ], + [ + "server.layer_3.ffn_down", + 0.9339259122807018 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer", + 0.87392035 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_up", + 0.900607754385965 + ], + [ + "server.layer_4.ffn_down", + 0.895272649122807 + ], + [ + "server.buffer.logits", + 1.0170518867924527 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ] + ], + [ + [ + "server.layer_1", + 0.0 + ], + [ + "server.kv_cache_2", + 0.9266117894736842 + ], + [ + "server.layer_3.ffn_down", + 0.9165338070175438 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer", + 0.87392035 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_up", + 0.900607754385965 + ], + [ + "server.layer_4.ffn_down", + 0.895272649122807 + ], + [ + "server.buffer.logits", + 1.0170518867924527 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ] + ], + [ + [ + "server.kv_cache_1", + 0.0 + ], + [ + "server.layer_2.ffn_down", + 0.9328606140350878 + ], + [ + "server.layer_4", + 1.013014425925926 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ] + ], + [ + [ + "server.layer_2.k", + 0.0 + ], + [ + "server.layer_3.ffn_down", + 0.9285100701754386 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer", + 0.87392035 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_up", + 0.900607754385965 + ], + [ + "server.layer_4.ffn_down", + 0.895272649122807 + ], + [ + "server.buffer.logits", + 1.0170518867924527 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ] + ], + [ + [ + "server.layer_2.v", + 0.0 + ], + [ + "server.layer_3.ffn_down", + 0.9228537719298247 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer", + 0.87392035 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_up", + 0.900607754385965 + ], + [ + "server.layer_4.ffn_down", + 0.895272649122807 + ], + [ + "server.buffer.logits", + 1.0170518867924527 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ] + ], + [ + [ + "server.layer_1.ffn_up", + 0.0 + ], + [ + "server.layer_2.ffn_down", + 0.9174417368421053 + ], + [ + "server.layer_4", + 1.013014425925926 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ] + ], + [ + [ + "server.kv_cache_2.keys", + 0.0 + ], + [ + "server.layer_3.ffn_down", + 0.910787052631579 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer", + 0.87392035 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_up", + 0.900607754385965 + ], + [ + "server.layer_4.ffn_down", + 0.895272649122807 + ], + [ + "server.buffer.logits", + 1.0170518867924527 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ] + ], + [ + [ + "server.layer_3", + 0.0 + ], + [ + "server.layer_4", + 0.8929586666666667 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_3.ffn_up", + 0.8956261754385966 + ] + ], + [ + [ + "server.layer_3.q", + 0.0 + ], + [ + "server.layer_4", + 0.8850117368421053 + ], + [ + "server.buffer", + 1.0278262727272727 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_3.ffn_up", + 0.8956261754385966 + ] + ], + [ + [ + "server.layer_3.k", + 0.0 + ], + [ + "server.layer_4.q", + 0.8864671052631579 + ], + [ + "server.buffer", + 1.0207579454545455 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ] + ], + [ + [ + "server.layer_3.v", + 0.0 + ], + [ + "server.kv_cache_4", + 0.8972676140350877 + ], + [ + "server.buffer", + 1.020730232142857 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ] + ], + [ + [ + "server.kv_cache_3", + 0.0 + ], + [ + "server.kv_cache_4.keys", + 0.8964911403508772 + ], + [ + "server.buffer", + 1.0149988571428572 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ] + ], + [ + [ + "server.kv_cache_3.keys", + 0.0 + ], + [ + "server.kv_cache_4.values", + 0.8964836666666667 + ], + [ + "server.buffer", + 1.0271041578947369 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ] + ], + [ + [ + "server.kv_cache_3.values", + 0.0 + ], + [ + "server.layer_4.ffn_down", + 0.9015835614035088 + ], + [ + "server.buffer", + 1.016766947368421 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ] + ], + [ + [ + "server.layer_4.k", + 0.0 + ], + [ + "server.buffer", + 1.0327080178571428 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ], + [ + "server.layer_5.k", + 0.8970205535714285 + ] + ], + [ + [ + "server.layer_5", + 0.0 + ], + [ + "server.buffer", + 0.8834063333333334 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ], + [ + "server.layer_5.k", + 0.8970205535714285 + ] + ], + [ + [ + "server.layer_4.v", + 0.0 + ], + [ + "server.buffer", + 1.0276728035714284 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ], + [ + "server.layer_5.k", + 0.8970205535714285 + ] + ], + [ + [ + "server.layer_4.ffn_up", + 0.0 + ], + [ + "server.buffer", + 1.0218846491228069 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ], + [ + "server.layer_5.k", + 0.8970205535714285 + ] + ], + [ + [ + "server.layer_5.v", + 0.0 + ], + [ + "server.buffer", + 0.8635245833333334 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ], + [ + "server.layer_5.k", + 0.8970205535714285 + ] + ], + [ + [ + "server.kv_cache_5", + 0.0 + ], + [ + "server.buffer", + 0.8571830166666667 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ], + [ + "server.layer_5.k", + 0.8970205535714285 + ] + ], + [ + [ + "server.kv_cache_5.keys", + 0.0 + ], + [ + "server.buffer", + 0.85156655 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ], + [ + "server.layer_5.k", + 0.8970205535714285 + ] + ], + [ + [ + "server.kv_cache_5.values", + 0.0 + ], + [ + "server.buffer", + 0.8463985833333334 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ], + [ + "server.layer_5.k", + 0.8970205535714285 + ] + ], + [ + [ + "server.layer_5.ffn_up", + 0.0 + ], + [ + "server.buffer", + 0.8414601500000001 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ], + [ + "server.layer_5.k", + 0.8970205535714285 + ] + ], + [ + [ + "server.layer_5.ffn_down", + 0.0 + ], + [ + "server.buffer", + 0.83639195 + ], + [ + "server.layer_1.q", + 0.9227793606557377 + ], + [ + "server.kv_cache_2.values", + 0.9300350877192982 + ], + [ + "server.layer_3.ffn_down", + 0.9057154736842106 + ], + [ + "server.layer_5.q", + 1.0162622264150942 + ], + [ + "server.buffer.logits", + 0.8781556785714285 + ], + [ + "server.layer_1.k", + 0.9080565454545454 + ], + [ + "server.layer_2.ffn_up", + 0.9294538596491229 + ], + [ + "server.layer_4", + 1.0292142407407407 + ], + [ + "server.layer_5.k", + 0.8970205535714285 + ] + ] + ], + "summary": { + "total_nodes": 60, + "total_edges": 3535, + "strong_edges": 3426, + "clusters": 1, + "chains": 37 + } +} \ No newline at end of file