JavaZero/traces / 01_matmul_add /4096_bf16_warm_eager.json
JavaZero's picture
download
raw
23.4 kB
{
"schemaVersion": 1,
"deviceProperties": [
{
"id": 0, "name": "NVIDIA GeForce RTX 5080", "totalGlobalMem": 17094475776,
"computeMajor": 12, "computeMinor": 0,
"maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 1536,
"regsPerBlock": 65536, "warpSize": 32,
"sharedMemPerBlock": 49152, "numSms": 84
, "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 101376, "sharedMemPerMultiprocessor": 102400
}
],
"trace_id": "77DE7F871BDB46388D0B86FD2168C8D0",
"cuda_driver_version": 13000,
"cuda_runtime_version": 13020,
"cupti_version": 130201,
"displayTimeUnit": "ms",
"baseTimeNanoseconds": 1775078550000000000,
"traceEvents": [
{
"ph": "X", "cat": "user_annotation", "name": "ProfilerStep#2", "pid": 93685, "tid": 93685,
"ts": 6098760135613.009, "dur": 434.153,
"args": {
"External id": 1,"Record function id": 0, "Ev Idx": 0
}
},
{
"ph": "X", "cat": "user_annotation", "name": "matmul_add", "pid": 93685, "tid": 93685,
"ts": 6098760135633.868, "dur": 405.890,
"args": {
"External id": 2,"Record function id": 0, "Ev Idx": 1
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 93685, "tid": 93685,
"ts": 6098760135940.438, "dur": 60.305,
"args": {
"External id": 3,"Record function id": 0, "Sequence number": 0, "Fwd thread id": 0, "Ev Idx": 2
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 93685, "tid": 93685,
"ts": 6098760135942.753, "dur": 55.175,
"args": {
"External id": 4,"Record function id": 0, "Sequence number": 0, "Fwd thread id": 0, "Ev Idx": 3
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 93685, "tid": 93685,
"ts": 6098760136007.526, "dur": 14.698,
"args": {
"External id": 5,"Record function id": 0, "Sequence number": 0, "Fwd thread id": 0, "Ev Idx": 4
}
},
{
"ph": "X", "cat": "user_annotation", "name": "ProfilerStep#3", "pid": 93685, "tid": 93685,
"ts": 6098760136063.122, "dur": 42.020,
"args": {
"External id": 6,"Record function id": 0, "Ev Idx": 5
}
},
{
"ph": "X", "cat": "user_annotation", "name": "matmul_add", "pid": 93685, "tid": 93685,
"ts": 6098760136073.382, "dur": 28.594,
"args": {
"External id": 7,"Record function id": 0, "Ev Idx": 6
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 93685, "tid": 93685,
"ts": 6098760136076.738, "dur": 13.486,
"args": {
"External id": 8,"Record function id": 0, "Sequence number": 0, "Fwd thread id": 0, "Ev Idx": 7
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 93685, "tid": 93685,
"ts": 6098760136077.089, "dur": 12.764,
"args": {
"External id": 9,"Record function id": 0, "Sequence number": 0, "Fwd thread id": 0, "Ev Idx": 8
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 93685, "tid": 93685,
"ts": 6098760136091.226, "dur": 6.312,
"args": {
"External id": 10,"Record function id": 0, "Sequence number": 0, "Fwd thread id": 0, "Ev Idx": 9
}
},
{
"ph": "X", "cat": "user_annotation", "name": "ProfilerStep#4", "pid": 93685, "tid": 93685,
"ts": 6098760136113.268, "dur": 25.088,
"args": {
"External id": 11,"Record function id": 0, "Ev Idx": 10
}
},
{
"ph": "X", "cat": "user_annotation", "name": "matmul_add", "pid": 93685, "tid": 93685,
"ts": 6098760136117.055, "dur": 18.666,
"args": {
"External id": 12,"Record function id": 0, "Ev Idx": 11
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 93685, "tid": 93685,
"ts": 6098760136119.079, "dur": 8.416,
"args": {
"External id": 13,"Record function id": 0, "Sequence number": 0, "Fwd thread id": 0, "Ev Idx": 12
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 93685, "tid": 93685,
"ts": 6098760136119.350, "dur": 7.945,
"args": {
"External id": 14,"Record function id": 0, "Sequence number": 0, "Fwd thread id": 0, "Ev Idx": 13
}
},
{
"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 93685, "tid": 93685,
"ts": 6098760136128.247, "dur": 4.468,
"args": {
"External id": 15,"Record function id": 0, "Sequence number": 0, "Fwd thread id": 0, "Ev Idx": 14
}
},
{
"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel2<cutlass_80_tensorop_bf16_s16816gemm_relu_bf16_128x64_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_relu_bf16_128x64_64x3_nn_align8::Params)", "pid": 0, "tid": 7,
"ts": 6098760136257.891, "dur": 1241.753,
"args": {
"queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 17, "registers per thread": 154, "shared memory": 73728, "blocks per SM": 24.380953, "warps per SM": 97.523811, "grid": [512, 4, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0
}
},
{
"ph": "f", "id": 17, "pid": 0, "tid": 7, "ts": 6098760136257.891,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, std::array<char*, 3ul> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, std::array<char*, 3ul>)", "pid": 0, "tid": 7,
"ts": 6098760137500.795, "dur": 69.276,
"args": {
"queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 27, "registers per thread": 40, "shared memory": 0, "blocks per SM": 195.047623, "warps per SM": 780.190491, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100
}
},
{
"ph": "f", "id": 27, "pid": 0, "tid": 7, "ts": 6098760137500.795,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "cudaDeviceGetAttribute", "pid": 93685, "tid": 93685,
"ts": 6098760135981.196, "dur": 0.701,
"args": {
"External id": 4, "cbid": 200, "correlation": 44
}
},
{
"ph": "f", "id": 44, "pid": 93685, "tid": 93685, "ts": 6098760135981.196,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel2<cutlass_80_tensorop_bf16_s16816gemm_relu_bf16_128x64_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_relu_bf16_128x64_64x3_nn_align8::Params)", "pid": 0, "tid": 7,
"ts": 6098760137571.063, "dur": 1239.099,
"args": {
"External id": 4, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 47, "registers per thread": 154, "shared memory": 73728, "blocks per SM": 24.380953, "warps per SM": 97.523811, "grid": [512, 4, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0
}
},
{
"ph": "f", "id": 47, "pid": 0, "tid": 7, "ts": 6098760137571.063,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_driver", "name": "cuLaunchKernel", "pid": 93685, "tid": 93685,
"ts": 6098760135984.262, "dur": 12.033,
"args": {
"External id": 4, "cbid": 307, "correlation": 47
}
},
{
"ph": "s", "id": 47, "pid": 93685, "tid": 93685, "ts": 6098760135984.262,
"cat": "ac2g", "name": "ac2g"
},
{
"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, std::array<char*, 3ul> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, std::array<char*, 3ul>)", "pid": 0, "tid": 7,
"ts": 6098760138811.505, "dur": 69.723,
"args": {
"External id": 5, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 57, "registers per thread": 40, "shared memory": 0, "blocks per SM": 195.047623, "warps per SM": 780.190491, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100
}
},
{
"ph": "f", "id": 57, "pid": 0, "tid": 7, "ts": 6098760138811.505,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 93685, "tid": 93685,
"ts": 6098760136016.413, "dur": 4.338,
"args": {
"External id": 5, "cbid": 211, "correlation": 57
}
},
{
"ph": "s", "id": 57, "pid": 93685, "tid": 93685, "ts": 6098760136016.413,
"cat": "ac2g", "name": "ac2g"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "cudaDeviceGetAttribute", "pid": 93685, "tid": 93685,
"ts": 6098760136085.405, "dur": 0.190,
"args": {
"External id": 9, "cbid": 200, "correlation": 74
}
},
{
"ph": "f", "id": 74, "pid": 93685, "tid": 93685, "ts": 6098760136085.405,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel2<cutlass_80_tensorop_bf16_s16816gemm_relu_bf16_128x64_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_relu_bf16_128x64_64x3_nn_align8::Params)", "pid": 0, "tid": 7,
"ts": 6098760138882.124, "dur": 1239.834,
"args": {
"External id": 9, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 77, "registers per thread": 154, "shared memory": 73728, "blocks per SM": 24.380953, "warps per SM": 97.523811, "grid": [512, 4, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0
}
},
{
"ph": "f", "id": 77, "pid": 0, "tid": 7, "ts": 6098760138882.124,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_driver", "name": "cuLaunchKernel", "pid": 93685, "tid": 93685,
"ts": 6098760136086.387, "dur": 2.815,
"args": {
"External id": 9, "cbid": 307, "correlation": 77
}
},
{
"ph": "s", "id": 77, "pid": 93685, "tid": 93685, "ts": 6098760136086.387,
"cat": "ac2g", "name": "ac2g"
},
{
"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, std::array<char*, 3ul> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, std::array<char*, 3ul>)", "pid": 0, "tid": 7,
"ts": 6098760140123.621, "dur": 69.628,
"args": {
"External id": 10, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 87, "registers per thread": 40, "shared memory": 0, "blocks per SM": 195.047623, "warps per SM": 780.190491, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100
}
},
{
"ph": "f", "id": 87, "pid": 0, "tid": 7, "ts": 6098760140123.621,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 93685, "tid": 93685,
"ts": 6098760136094.172, "dur": 2.625,
"args": {
"External id": 10, "cbid": 211, "correlation": 87
}
},
{
"ph": "s", "id": 87, "pid": 93685, "tid": 93685, "ts": 6098760136094.172,
"cat": "ac2g", "name": "ac2g"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "cudaDeviceGetAttribute", "pid": 93685, "tid": 93685,
"ts": 6098760136124.319, "dur": 0.160,
"args": {
"External id": 14, "cbid": 200, "correlation": 104
}
},
{
"ph": "f", "id": 104, "pid": 93685, "tid": 93685, "ts": 6098760136124.319,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel2<cutlass_80_tensorop_bf16_s16816gemm_relu_bf16_128x64_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_relu_bf16_128x64_64x3_nn_align8::Params)", "pid": 0, "tid": 7,
"ts": 6098760140195.264, "dur": 1239.035,
"args": {
"External id": 14, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 107, "registers per thread": 154, "shared memory": 73728, "blocks per SM": 24.380953, "warps per SM": 97.523811, "grid": [512, 4, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0
}
},
{
"ph": "f", "id": 107, "pid": 0, "tid": 7, "ts": 6098760140195.264,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_driver", "name": "cuLaunchKernel", "pid": 93685, "tid": 93685,
"ts": 6098760136124.990, "dur": 1.754,
"args": {
"External id": 14, "cbid": 307, "correlation": 107
}
},
{
"ph": "s", "id": 107, "pid": 93685, "tid": 93685, "ts": 6098760136124.990,
"cat": "ac2g", "name": "ac2g"
},
{
"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, std::array<char*, 3ul> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, std::array<char*, 3ul>)", "pid": 0, "tid": 7,
"ts": 6098760141436.697, "dur": 69.276,
"args": {
"External id": 15, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 117, "registers per thread": 40, "shared memory": 0, "blocks per SM": 195.047623, "warps per SM": 780.190491, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100
}
},
{
"ph": "f", "id": 117, "pid": 0, "tid": 7, "ts": 6098760141436.697,
"cat": "ac2g", "name": "ac2g", "bp": "e"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 93685, "tid": 93685,
"ts": 6098760136130.270, "dur": 1.894,
"args": {
"External id": 15, "cbid": 211, "correlation": 117
}
},
{
"ph": "s", "id": 117, "pid": 93685, "tid": 93685, "ts": 6098760136130.270,
"cat": "ac2g", "name": "ac2g"
},
{
"ph": "X", "cat": "cuda_runtime", "name": "cudaDeviceSynchronize", "pid": 93685, "tid": 93685,
"ts": 6098760136165.087, "dur": 5332.437,
"args": {
"cbid": 165, "correlation": 123
}
},
{
"ph": "s", "id": 123, "pid": 93685, "tid": 93685, "ts": 6098760136165.087,
"cat": "ac2g", "name": "ac2g"
},
{
"ph": "X", "cat": "gpu_user_annotation", "name": "matmul_add", "pid": 0, "tid": 7,
"ts": 6098760140195.263, "dur": 1310.711,
"args": {
"External id": 12
}
},
{
"ph": "X", "cat": "gpu_user_annotation", "name": "matmul_add", "pid": 0, "tid": 7,
"ts": 6098760138882.123, "dur": 1311.127,
"args": {
"External id": 7
}
},
{
"ph": "X", "cat": "gpu_user_annotation", "name": "matmul_add", "pid": 0, "tid": 7,
"ts": 6098760137571.062, "dur": 1310.167,
"args": {
"External id": 2
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 93685, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 93685, "tid": 0,
"args": {
"labels": "CPU"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 93685, "tid": 0,
"args": {
"sort_index": 93685
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 0, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 0, "tid": 0,
"args": {
"labels": "GPU 0"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 0, "tid": 0,
"args": {
"sort_index": 5000000
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 1, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 1, "tid": 0,
"args": {
"labels": "GPU 1"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 1, "tid": 0,
"args": {
"sort_index": 5000001
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 2, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 2, "tid": 0,
"args": {
"labels": "GPU 2"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 2, "tid": 0,
"args": {
"sort_index": 5000002
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 3, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 3, "tid": 0,
"args": {
"labels": "GPU 3"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 3, "tid": 0,
"args": {
"sort_index": 5000003
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 4, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 4, "tid": 0,
"args": {
"labels": "GPU 4"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 4, "tid": 0,
"args": {
"sort_index": 5000004
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 5, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 5, "tid": 0,
"args": {
"labels": "GPU 5"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 5, "tid": 0,
"args": {
"sort_index": 5000005
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 6, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 6, "tid": 0,
"args": {
"labels": "GPU 6"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 6, "tid": 0,
"args": {
"sort_index": 5000006
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 7, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 7, "tid": 0,
"args": {
"labels": "GPU 7"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 7, "tid": 0,
"args": {
"sort_index": 5000007
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 8, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 8, "tid": 0,
"args": {
"labels": "GPU 8"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 8, "tid": 0,
"args": {
"sort_index": 5000008
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 9, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 9, "tid": 0,
"args": {
"labels": "GPU 9"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 9, "tid": 0,
"args": {
"sort_index": 5000009
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 10, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 10, "tid": 0,
"args": {
"labels": "GPU 10"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 10, "tid": 0,
"args": {
"sort_index": 5000010
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 11, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 11, "tid": 0,
"args": {
"labels": "GPU 11"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 11, "tid": 0,
"args": {
"sort_index": 5000011
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 12, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 12, "tid": 0,
"args": {
"labels": "GPU 12"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 12, "tid": 0,
"args": {
"sort_index": 5000012
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 13, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 13, "tid": 0,
"args": {
"labels": "GPU 13"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 13, "tid": 0,
"args": {
"sort_index": 5000013
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 14, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 14, "tid": 0,
"args": {
"labels": "GPU 14"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 14, "tid": 0,
"args": {
"sort_index": 5000014
}
},
{
"name": "process_name", "ph": "M", "ts": 6098760135463.418, "pid": 15, "tid": 0,
"args": {
"name": "python"
}
},
{
"name": "process_labels", "ph": "M", "ts": 6098760135463.418, "pid": 15, "tid": 0,
"args": {
"labels": "GPU 15"
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 15, "tid": 0,
"args": {
"sort_index": 5000015
}
},
{
"name": "thread_name", "ph": "M", "ts": 6098760135463.418, "pid": 0, "tid": 7,
"args": {
"name": "stream 7 "
}
},
{
"name": "thread_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 0, "tid": 7,
"args": {
"sort_index": 7
}
},
{
"name": "thread_name", "ph": "M", "ts": 6098760135463.418, "pid": 93685, "tid": 93685,
"args": {
"name": "thread 93685 (python)"
}
},
{
"name": "thread_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 93685, "tid": 93685,
"args": {
"sort_index": 93685
}
},
{
"name": "thread_name", "ph": "M", "ts": 6098760135463.418, "pid": 93685, "tid": 93685,
"args": {
"name": "thread 93685 (python)"
}
},
{
"name": "thread_sort_index", "ph": "M", "ts": 6098760135463.418, "pid": 93685, "tid": 93685,
"args": {
"sort_index": 93685
}
},
{
"ph": "X", "cat": "Trace", "ts": 6098760135406.890, "dur": 6101.951,
"pid": "Spans", "tid": "PyTorch Profiler",
"name": "PyTorch Profiler (0)",
"args": {
"Op count": 0
}
},
{
"name": "process_sort_index", "ph": "M", "ts": 6098760135406.890,
"pid": "Spans", "tid": 0,
"args": {
"sort_index": 536870912
}
},
{
"name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g",
"pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 6098760135406.890
},
{
"name": "Record Window End", "ph": "i", "s": "g",
"pid": "", "tid": "", "ts": 6098760141891.837
}
],
"traceName": "./traces/01_matmul_add/4096_bf16_warm_eager.json"
}

Xet Storage Details

Size:
23.4 kB
·
Xet hash:
a88e9d990598f5d50325bae264fdd23277deec54347e156d3fb71c406f822960

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.