diff --git "a/profile_trace/iteration_11776/rank5_trace.json" "b/profile_trace/iteration_11776/rank5_trace.json" new file mode 100644--- /dev/null +++ "b/profile_trace/iteration_11776/rank5_trace.json" @@ -0,0 +1,68774 @@ + +{ + "schemaVersion": 1, + "deviceProperties": [ + { + "id": 0, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 1, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 2, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 3, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 4, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 5, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 6, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 7, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + } + ], + "cupti_version": 22, + "cuda_runtime_version": 12040, + "cuda_driver_version": 12080, + "distributedInfo": {"backend": "nccl", "rank": 5, "world_size": 8, "pg_count": 1, "pg_config": [{"pg_name": "0", "pg_desc": "default_pg", "backend_config": "cuda:nccl", "pg_size": 8, "ranks": [0, 1, 2, 3, 4, 5, 6, 7]}], "nccl_version": "2.21.5"}, + "record_shapes": 1, + "trace_id": "C568BFD2F1654B5EB17FF639EDF14F7C", + "traceEvents": [ + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5327096682479.961, "dur": 123.890, + "args": { + "External id": 231425,"Record function id": 0, "Sequence number": 959175, "Fwd thread id": 1, "Ev Idx": 0 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5327096682497.676, "dur": 96.126, + "args": { + "External id": 231426,"Sequence number": 959175, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 1 + } + }, + { + "ph": "f", "id": 1, "pid": 2070552, "tid": 2107648, "ts": 5327096682497.676, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070552, "tid": 2107648, + "ts": 5327096682506.548, "dur": 85.079, + "args": { + "External id": 231427,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 2 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5327096682614.262, "dur": 210.116, + "args": { + "External id": 231428,"Record function id": 0, "Ev Idx": 3 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327096682706.924, "dur": 102.044, + "args": { + "External id": 231429,"Record function id": 0, "Ev Idx": 4 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.31", "pid": 2070552, "tid": 2107648, + "ts": 5327096682742.954, "dur": 53.661, + "args": { + "External id": 231430,"Record function id": 0, "Ev Idx": 5 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5327096682814.107, "dur": 2.060, + "args": { + "External id": 231431,"Sequence number": 959174, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6 + } + }, + { + "ph": "f", "id": 2, "pid": 2070552, "tid": 2107648, "ts": 5327096682814.107, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearListNetFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096682833.449, "dur": 77070.222, + "args": { + "External id": 231432,"Record function id": 0, "Sequence number": 959173, "Fwd thread id": 1, "Ev Idx": 7 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearListNetFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096682839.776, "dur": 77009.532, + "args": { + "External id": 231433,"Sequence number": 959173, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 8 + } + }, + { + "ph": "f", "id": 3, "pid": 2070552, "tid": 2107648, "ts": 5327096682839.776, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096682879.446, "dur": 4.252, + "args": { + "External id": 231434,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096682887.004, "dur": 76830.329, + "args": { + "External id": 231435,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096682889.504, "dur": 76827.072, + "args": { + "External id": 231436,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 11 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096682893.388, "dur": 9.779, + "args": { + "External id": 231437,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096682907.549, "dur": 76807.331, + "args": { + "External id": 231438,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 13 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2070552, "tid": 2107648, + "ts": 5327096759724.045, "dur": 0.512, + "args": { + "External id": 231439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 14 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2070552, "tid": 2107648, + "ts": 5327096759726.914, "dur": 3.244, + "args": { + "External id": 231440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2070552, "tid": 2107648, + "ts": 5327096759728.867, "dur": 1.122, + "args": { + "External id": 231441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 16 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070552, "tid": 2107648, + "ts": 5327096759737.865, "dur": 40.957, + "args": { + "External id": 231442,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 17 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070552, "tid": 2107648, + "ts": 5327096759790.242, "dur": 52.503, + "args": { + "External id": 231443,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 18 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070552, "tid": 2107648, + "ts": 5327096759791.916, "dur": 50.630, + "args": { + "External id": 231444,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070552, "tid": 2107648, + "ts": 5327096759794.113, "dur": 48.052, + "args": { + "External id": 231445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 20 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096759855.783, "dur": 37.946, + "args": { + "External id": 231446,"Record function id": 0, "Concrete Inputs": ["", "15", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], []], "Ev Idx": 21 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096759857.094, "dur": 36.441, + "args": { + "External id": 231447,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 22 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096759864.109, "dur": 6.440, + "args": { + "External id": 231448,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 23 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096759871.784, "dur": 21.148, + "args": { + "External id": 231449,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 24 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096759917.433, "dur": 12.851, + "args": { + "External id": 231450,"Record function id": 0, "Ev Idx": 25 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096759920.567, "dur": 8.195, + "args": { + "External id": 231451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 26 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096759923.735, "dur": 4.494, + "args": { + "External id": 231452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 27 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096759924.553, "dur": 3.554, + "args": { + "External id": 231453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 28 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5327096759934.484, "dur": 19.557, + "args": { + "External id": 231454,"Record function id": 0, "Sequence number": 959172, "Fwd thread id": 1, "Ev Idx": 29 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5327096759935.713, "dur": 15.605, + "args": { + "External id": 231455,"Sequence number": 959172, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 30 + } + }, + { + "ph": "f", "id": 4, "pid": 2070552, "tid": 2107648, "ts": 5327096759935.713, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2070552, "tid": 2107648, + "ts": 5327096759941.486, "dur": 9.555, + "args": { + "External id": 231456,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 31 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096759944.624, "dur": 6.228, + "args": { + "External id": 231457,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 32 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096759958.385, "dur": 208.981, + "args": { + "External id": 231458,"Record function id": 0, "Sequence number": 959171, "Fwd thread id": 1, "Ev Idx": 33 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096759966.547, "dur": 192.133, + "args": { + "External id": 231459,"Sequence number": 959171, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 34 + } + }, + { + "ph": "f", "id": 5, "pid": 2070552, "tid": 2107648, "ts": 5327096759966.547, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096760021.423, "dur": 3.799, + "args": { + "External id": 231460,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 35 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096760027.877, "dur": 51.074, + "args": { + "External id": 231461,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 36 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096760028.589, "dur": 50.105, + "args": { + "External id": 231462,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 37 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096760029.882, "dur": 7.896, + "args": { + "External id": 231463,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 38 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096760038.727, "dur": 39.336, + "args": { + "External id": 231464,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 39 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2070552, "tid": 2107648, + "ts": 5327096760080.614, "dur": 0.269, + "args": { + "External id": 231465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 40 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2070552, "tid": 2107648, + "ts": 5327096760081.890, "dur": 3.757, + "args": { + "External id": 231466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 41 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2070552, "tid": 2107648, + "ts": 5327096760084.938, "dur": 0.582, + "args": { + "External id": 231467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 42 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070552, "tid": 2107648, + "ts": 5327096760088.656, "dur": 20.392, + "args": { + "External id": 231468,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 43 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070552, "tid": 2107648, + "ts": 5327096760112.818, "dur": 38.707, + "args": { + "External id": 231469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 44 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070552, "tid": 2107648, + "ts": 5327096760113.749, "dur": 37.615, + "args": { + "External id": 231470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 45 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070552, "tid": 2107648, + "ts": 5327096760115.694, "dur": 35.380, + "args": { + "External id": 231471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 46 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096760174.465, "dur": 8.497, + "args": { + "External id": 231472,"Record function id": 0, "Ev Idx": 47 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096760177.531, "dur": 4.720, + "args": { + "External id": 231473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 48 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096760178.876, "dur": 2.162, + "args": { + "External id": 231474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 49 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096760179.462, "dur": 1.488, + "args": { + "External id": 231475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 50 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5327096760186.672, "dur": 47.915, + "args": { + "External id": 231476,"Record function id": 0, "Sequence number": 959170, "Fwd thread id": 1, "Ev Idx": 51 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5327096760187.745, "dur": 7.252, + "args": { + "External id": 231477,"Sequence number": 959170, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 52 + } + }, + { + "ph": "f", "id": 6, "pid": 2070552, "tid": 2107648, "ts": 5327096760187.745, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2070552, "tid": 2107648, + "ts": 5327096760189.849, "dur": 4.963, + "args": { + "External id": 231478,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 53 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096760192.738, "dur": 1.947, + "args": { + "External id": 231479,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 54 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070552, "tid": 2107648, + "ts": 5327096760199.987, "dur": 26.197, + "args": { + "External id": 231480,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 55 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096760239.853, "dur": 345.840, + "args": { + "External id": 231481,"Record function id": 0, "Sequence number": 959169, "Fwd thread id": 1, "Ev Idx": 56 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096760241.037, "dur": 334.785, + "args": { + "External id": 231482,"Sequence number": 959169, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 57 + } + }, + { + "ph": "f", "id": 7, "pid": 2070552, "tid": 2107648, "ts": 5327096760241.037, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327096760404.705, "dur": 47.821, + "args": { + "External id": 231483,"kernel_hash": "cuukjsp6rxz3jug6vt6aydazifg4agx2qo5hdktyvypms7xloy55", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uu/cuukjsp6rxz3jug6vt6aydazifg4agx2qo5hdktyvypms7xloy55.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 58 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 2070552, "tid": 2107648, + "ts": 5327096760489.062, "dur": 26.297, + "args": { + "External id": 231484,"kernel_hash": "ca6xizp2qkfxzkredwq3zuqbocaripz3jyqqq6oyyli4d7qxaxem", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/a6/ca6xizp2qkfxzkredwq3zuqbocaripz3jyqqq6oyyli4d7qxaxem.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 59 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096760533.214, "dur": 21.069, + "args": { + "External id": 231485,"kernel_hash": "c37t5saqik2yxxap5wjzy6t6ncvdgty24ktri3fnll7y5tjs566n", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/37/c37t5saqik2yxxap5wjzy6t6ncvdgty24ktri3fnll7y5tjs566n.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 60 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096760593.159, "dur": 7.803, + "args": { + "External id": 231486,"Record function id": 0, "Ev Idx": 61 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096760595.334, "dur": 4.978, + "args": { + "External id": 231487,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 62 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096760596.913, "dur": 2.610, + "args": { + "External id": 231488,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 63 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096760597.833, "dur": 1.580, + "args": { + "External id": 231489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 64 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096760605.157, "dur": 2840.727, + "args": { + "External id": 231490,"Record function id": 0, "Ev Idx": 65 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.31)", "pid": 2070552, "tid": 2107648, + "ts": 5327096760677.759, "dur": 1022.302, + "args": { + "External id": 231491,"Record function id": 0, "Ev Idx": 66 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.30", "pid": 2070552, "tid": 2107648, + "ts": 5327096760705.378, "dur": 984.517, + "args": { + "External id": 231492,"Record function id": 0, "Ev Idx": 67 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.30)", "pid": 2070552, "tid": 2107648, + "ts": 5327096760721.933, "dur": 948.085, + "args": { + "External id": 231493,"Record function id": 0, "Ev Idx": 68 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096760826.410, "dur": 6.992, + "args": { + "External id": 231494,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 69 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327096760850.087, "dur": 38.866, + "args": { + "External id": 231495,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 70 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096760856.808, "dur": 3.570, + "args": { + "External id": 231496,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 71 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096760861.867, "dur": 0.461, + "args": { + "External id": 231497,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 72 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096760862.877, "dur": 2.691, + "args": { + "External id": 231498,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 73 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096760870.071, "dur": 0.574, + "args": { + "External id": 231499,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 74 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096760871.176, "dur": 0.389, + "args": { + "External id": 231500,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 75 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096760872.184, "dur": 0.327, + "args": { + "External id": 231501,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 76 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096760875.862, "dur": 0.376, + "args": { + "External id": 231502,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 77 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096760877.059, "dur": 0.536, + "args": { + "External id": 231503,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 78 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096760879.459, "dur": 2.250, + "args": { + "External id": 231504,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 79 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096760902.413, "dur": 40.277, + "args": { + "External id": 231505,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 80 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327096760997.578, "dur": 124.107, + "args": { + "External id": 231506,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 81 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096761010.756, "dur": 6.871, + "args": { + "External id": 231507,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 82 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327096761024.674, "dur": 14.164, + "args": { + "External id": 231508,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 83 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327096761029.955, "dur": 8.492, + "args": { + "External id": 231509,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 84 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096761034.254, "dur": 2.662, + "args": { + "External id": 231510,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 85 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327096761045.726, "dur": 29.473, + "args": { + "External id": 231511,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 86 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096761047.258, "dur": 0.622, + "args": { + "External id": 231512,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 87 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096761050.633, "dur": 0.544, + "args": { + "External id": 231513,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 88 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096761051.706, "dur": 2.407, + "args": { + "External id": 231514,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 89 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096761054.717, "dur": 0.384, + "args": { + "External id": 231515,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 90 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096761056.371, "dur": 0.563, + "args": { + "External id": 231516,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 91 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096761058.966, "dur": 0.558, + "args": { + "External id": 231517,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 92 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096761061.349, "dur": 0.537, + "args": { + "External id": 231518,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 93 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096761062.619, "dur": 2.803, + "args": { + "External id": 231519,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 94 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096761067.651, "dur": 0.361, + "args": { + "External id": 231520,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 95 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096761087.451, "dur": 25.786, + "args": { + "External id": 231521,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 96 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327096761185.057, "dur": 329.843, + "args": { + "External id": 231522,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 97 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096761217.018, "dur": 292.712, + "args": { + "External id": 231523,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 98, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327096761227.592, "dur": 274.839, + "args": { + "External id": 231524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 99 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096761541.373, "dur": 2.547, + "args": { + "External id": 231525,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 100, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096761709.173, "dur": 1710.486, + "args": { + "External id": 231526,"Sequence number": 959168, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 101 + } + }, + { + "ph": "f", "id": 8, "pid": 2070552, "tid": 2107648, "ts": 5327096761709.173, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096761832.687, "dur": 125.255, + "args": { + "External id": 231527,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327096762017.197, "dur": 44.513, + "args": { + "External id": 231528,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327096762083.709, "dur": 57.120, + "args": { + "External id": 231529,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096762151.448, "dur": 35.733, + "args": { + "External id": 231530,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096762194.084, "dur": 47.328, + "args": { + "External id": 231531,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096762248.749, "dur": 29.135, + "args": { + "External id": 231532,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096762288.593, "dur": 44.414, + "args": { + "External id": 231533,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096762358.916, "dur": 24.187, + "args": { + "External id": 231534,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327096762402.456, "dur": 31.677, + "args": { + "External id": 231535,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096762459.893, "dur": 19.139, + "args": { + "External id": 231536,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327096762497.043, "dur": 14.599, + "args": { + "External id": 231537,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096762522.102, "dur": 29.788, + "args": { + "External id": 231538,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096762555.201, "dur": 34.474, + "args": { + "External id": 231539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327096762653.803, "dur": 197.167, + "args": { + "External id": 231540,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096762747.165, "dur": 6.308, + "args": { + "External id": 231541,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096762755.631, "dur": 2.234, + "args": { + "External id": 231542,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096762891.748, "dur": 26.407, + "args": { + "External id": 231543,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096762931.642, "dur": 18.090, + "args": { + "External id": 231544,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096762960.240, "dur": 61.840, + "args": { + "External id": 231545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096763030.811, "dur": 41.655, + "args": { + "External id": 231546,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096763080.045, "dur": 20.170, + "args": { + "External id": 231547,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096763104.747, "dur": 30.716, + "args": { + "External id": 231548,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096763143.321, "dur": 20.884, + "args": { + "External id": 231549,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096763171.325, "dur": 29.758, + "args": { + "External id": 231550,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327096763222.839, "dur": 22.550, + "args": { + "External id": 231551,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096763265.476, "dur": 25.085, + "args": { + "External id": 231552,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096763307.657, "dur": 15.558, + "args": { + "External id": 231553,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327096763338.443, "dur": 15.228, + "args": { + "External id": 231554,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327096763367.826, "dur": 17.120, + "args": { + "External id": 231555,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763468.001, "dur": 14.469, + "args": { + "External id": 231556,"Record function id": 0, "Ev Idx": 131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763471.201, "dur": 10.302, + "args": { + "External id": 231557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763475.436, "dur": 5.085, + "args": { + "External id": 231558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763476.741, "dur": 3.651, + "args": { + "External id": 231559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763486.382, "dur": 4.899, + "args": { + "External id": 231560,"Record function id": 0, "Ev Idx": 135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763487.835, "dur": 3.026, + "args": { + "External id": 231561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763488.759, "dur": 1.506, + "args": { + "External id": 231562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763489.453, "dur": 0.746, + "args": { + "External id": 231563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763494.823, "dur": 3.895, + "args": { + "External id": 231564,"Record function id": 0, "Ev Idx": 139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763495.843, "dur": 2.464, + "args": { + "External id": 231565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763496.461, "dur": 1.375, + "args": { + "External id": 231566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763497.100, "dur": 0.630, + "args": { + "External id": 231567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763501.936, "dur": 3.744, + "args": { + "External id": 231568,"Record function id": 0, "Ev Idx": 143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763503.043, "dur": 2.249, + "args": { + "External id": 231569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763503.712, "dur": 1.125, + "args": { + "External id": 231570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763504.188, "dur": 0.559, + "args": { + "External id": 231571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763508.852, "dur": 5.520, + "args": { + "External id": 231572,"Record function id": 0, "Ev Idx": 147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763511.450, "dur": 2.510, + "args": { + "External id": 231573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763512.137, "dur": 1.028, + "args": { + "External id": 231574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763512.425, "dur": 0.669, + "args": { + "External id": 231575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763517.464, "dur": 3.764, + "args": { + "External id": 231576,"Record function id": 0, "Ev Idx": 151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763518.403, "dur": 2.411, + "args": { + "External id": 231577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763519.016, "dur": 1.260, + "args": { + "External id": 231578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763519.333, "dur": 0.868, + "args": { + "External id": 231579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763524.354, "dur": 7.581, + "args": { + "External id": 231580,"Record function id": 0, "Ev Idx": 155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763525.222, "dur": 6.306, + "args": { + "External id": 231581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763526.026, "dur": 5.085, + "args": { + "External id": 231582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763528.326, "dur": 2.721, + "args": { + "External id": 231583,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763535.005, "dur": 3.732, + "args": { + "External id": 231584,"Record function id": 0, "Ev Idx": 159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763535.936, "dur": 2.383, + "args": { + "External id": 231585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763536.515, "dur": 1.200, + "args": { + "External id": 231586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763536.805, "dur": 0.838, + "args": { + "External id": 231587,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763542.925, "dur": 3.509, + "args": { + "External id": 231588,"Record function id": 0, "Ev Idx": 163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096763543.852, "dur": 2.182, + "args": { + "External id": 231589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763544.301, "dur": 1.277, + "args": { + "External id": 231590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096763544.750, "dur": 0.762, + "args": { + "External id": 231591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096763550.570, "dur": 37109.453, + "args": { + "External id": 231592,"Record function id": 0, "Sequence number": 959167, "Fwd thread id": 1, "Ev Idx": 167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096763552.078, "dur": 37062.928, + "args": { + "External id": 231593,"Sequence number": 959167, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 168 + } + }, + { + "ph": "f", "id": 9, "pid": 2070552, "tid": 2107648, "ts": 5327096763552.078, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.31)", "pid": 2070552, "tid": 2107648, + "ts": 5327096763583.304, "dur": 80.274, + "args": { + "External id": 231594,"Record function id": 0, "Ev Idx": 169 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.31)", "pid": 2070552, "tid": 2107648, + "ts": 5327096763672.904, "dur": 93.126, + "args": { + "External id": 231595,"Record function id": 0, "Ev Idx": 170 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.31)", "pid": 2070552, "tid": 2107648, + "ts": 5327096763773.403, "dur": 36834.014, + "args": { + "External id": 231596,"Record function id": 0, "Ev Idx": 171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096763834.204, "dur": 8.287, + "args": { + "External id": 231597,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096763852.920, "dur": 5.106, + "args": { + "External id": 231598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327096763873.185, "dur": 35840.208, + "args": { + "External id": 231599,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327096763888.931, "dur": 35815.493, + "args": { + "External id": 231600,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096763938.880, "dur": 4.316, + "args": { + "External id": 231601,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096763954.720, "dur": 35710.522, + "args": { + "External id": 231602,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096763958.986, "dur": 35705.574, + "args": { + "External id": 231603,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096763962.054, "dur": 5.157, + "args": { + "External id": 231604,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096763968.901, "dur": 35692.396, + "args": { + "External id": 231605,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096799814.084, "dur": 8.870, + "args": { + "External id": 231606,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096799816.870, "dur": 5.773, + "args": { + "External id": 231607,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327096799854.390, "dur": 426.137, + "args": { + "External id": 231608,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096799886.184, "dur": 389.141, + "args": { + "External id": 231609,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 184, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327096799898.802, "dur": 370.529, + "args": { + "External id": 231610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096800305.440, "dur": 2.158, + "args": { + "External id": 231611,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 186, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096800368.357, "dur": 8.707, + "args": { + "External id": 231612,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096800426.953, "dur": 1.030, + "args": { + "External id": 231613,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096800447.027, "dur": 1.305, + "args": { + "External id": 231614,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096800463.851, "dur": 0.738, + "args": { + "External id": 231615,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096800478.171, "dur": 2.956, + "args": { + "External id": 231616,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096800493.777, "dur": 1.233, + "args": { + "External id": 231617,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096800509.458, "dur": 1.093, + "args": { + "External id": 231618,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096800525.191, "dur": 2.985, + "args": { + "External id": 231619,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096800539.795, "dur": 2.915, + "args": { + "External id": 231620,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096800676.447, "dur": 2829.288, + "args": { + "External id": 231621,"Record function id": 0, "Ev Idx": 196 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.30)", "pid": 2070552, "tid": 2107648, + "ts": 5327096800699.276, "dur": 1119.602, + "args": { + "External id": 231622,"Record function id": 0, "Ev Idx": 197 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.30)", "pid": 2070552, "tid": 2107648, + "ts": 5327096800715.501, "dur": 368.227, + "args": { + "External id": 231623,"Record function id": 0, "Ev Idx": 198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096800807.071, "dur": 4.444, + "args": { + "External id": 231624,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096800814.903, "dur": 0.846, + "args": { + "External id": 231625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096800817.579, "dur": 1.026, + "args": { + "External id": 231626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096800822.391, "dur": 1.083, + "args": { + "External id": 231627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096800824.975, "dur": 1.124, + "args": { + "External id": 231628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096800827.606, "dur": 0.836, + "args": { + "External id": 231629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096800830.026, "dur": 4.232, + "args": { + "External id": 231630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096800837.612, "dur": 0.853, + "args": { + "External id": 231631,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096800840.027, "dur": 1.004, + "args": { + "External id": 231632,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096800842.674, "dur": 0.969, + "args": { + "External id": 231633,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096800863.830, "dur": 186.790, + "args": { + "External id": 231634,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096800887.209, "dur": 158.804, + "args": { + "External id": 231635,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096800907.114, "dur": 12.467, + "args": { + "External id": 231636,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096800924.461, "dur": 89.718, + "args": { + "External id": 231637,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096800927.047, "dur": 86.598, + "args": { + "External id": 231638,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096800931.021, "dur": 7.278, + "args": { + "External id": 231639,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096800939.906, "dur": 72.723, + "args": { + "External id": 231640,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 215 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.29", "pid": 2070552, "tid": 2107648, + "ts": 5327096801186.333, "dur": 625.420, + "args": { + "External id": 231641,"Record function id": 0, "Ev Idx": 216 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.29)", "pid": 2070552, "tid": 2107648, + "ts": 5327096801204.704, "dur": 594.479, + "args": { + "External id": 231642,"Record function id": 0, "Ev Idx": 217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096801266.875, "dur": 5.284, + "args": { + "External id": 231643,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327096801287.641, "dur": 40.012, + "args": { + "External id": 231644,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801292.707, "dur": 1.716, + "args": { + "External id": 231645,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801297.022, "dur": 2.635, + "args": { + "External id": 231646,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801301.044, "dur": 0.396, + "args": { + "External id": 231647,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801304.919, "dur": 0.318, + "args": { + "External id": 231648,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801306.847, "dur": 0.476, + "args": { + "External id": 231649,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801308.974, "dur": 2.086, + "args": { + "External id": 231650,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801315.404, "dur": 0.386, + "args": { + "External id": 231651,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801317.176, "dur": 0.549, + "args": { + "External id": 231652,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801320.849, "dur": 0.444, + "args": { + "External id": 231653,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096801337.479, "dur": 35.830, + "args": { + "External id": 231654,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327096801406.443, "dur": 108.577, + "args": { + "External id": 231655,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096801416.189, "dur": 5.491, + "args": { + "External id": 231656,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327096801426.422, "dur": 9.320, + "args": { + "External id": 231657,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327096801430.163, "dur": 5.136, + "args": { + "External id": 231658,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801433.653, "dur": 0.443, + "args": { + "External id": 231659,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327096801442.176, "dur": 31.264, + "args": { + "External id": 231660,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801444.111, "dur": 0.549, + "args": { + "External id": 231661,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801446.289, "dur": 0.418, + "args": { + "External id": 231662,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801449.818, "dur": 0.397, + "args": { + "External id": 231663,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801451.570, "dur": 0.427, + "args": { + "External id": 231664,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801453.366, "dur": 1.916, + "args": { + "External id": 231665,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801456.670, "dur": 0.370, + "args": { + "External id": 231666,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801458.458, "dur": 2.704, + "args": { + "External id": 231667,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801464.840, "dur": 0.370, + "args": { + "External id": 231668,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096801466.803, "dur": 0.550, + "args": { + "External id": 231669,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096801485.721, "dur": 21.440, + "args": { + "External id": 231670,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327096801561.377, "dur": 163.361, + "args": { + "External id": 231671,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096801591.281, "dur": 129.372, + "args": { + "External id": 231672,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 247, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327096801600.000, "dur": 115.800, + "args": { + "External id": 231673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096801744.574, "dur": 2.056, + "args": { + "External id": 231674,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 249, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096801826.933, "dur": 1654.022, + "args": { + "External id": 231675,"Sequence number": 959166, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 250 + } + }, + { + "ph": "f", "id": 10, "pid": 2070552, "tid": 2107648, "ts": 5327096801826.933, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096801937.915, "dur": 128.678, + "args": { + "External id": 231676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327096802112.392, "dur": 43.818, + "args": { + "External id": 231677,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327096802174.712, "dur": 48.948, + "args": { + "External id": 231678,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096802233.602, "dur": 32.297, + "args": { + "External id": 231679,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096802272.415, "dur": 45.624, + "args": { + "External id": 231680,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096802326.377, "dur": 27.445, + "args": { + "External id": 231681,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096802360.801, "dur": 43.281, + "args": { + "External id": 231682,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096802430.118, "dur": 25.424, + "args": { + "External id": 231683,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327096802473.915, "dur": 30.120, + "args": { + "External id": 231684,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096802526.061, "dur": 20.767, + "args": { + "External id": 231685,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327096802559.790, "dur": 13.513, + "args": { + "External id": 231686,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096802584.054, "dur": 27.402, + "args": { + "External id": 231687,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096802614.133, "dur": 82.452, + "args": { + "External id": 231688,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327096802731.707, "dur": 177.566, + "args": { + "External id": 231689,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096802810.458, "dur": 5.417, + "args": { + "External id": 231690,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096802818.334, "dur": 4.098, + "args": { + "External id": 231691,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096802948.357, "dur": 24.260, + "args": { + "External id": 231692,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096803002.877, "dur": 16.365, + "args": { + "External id": 231693,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096803029.074, "dur": 38.211, + "args": { + "External id": 231694,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096803072.918, "dur": 49.745, + "args": { + "External id": 231695,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096803136.097, "dur": 25.375, + "args": { + "External id": 231696,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096803167.853, "dur": 29.758, + "args": { + "External id": 231697,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096803202.849, "dur": 21.249, + "args": { + "External id": 231698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096803232.946, "dur": 29.053, + "args": { + "External id": 231699,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327096803286.065, "dur": 23.476, + "args": { + "External id": 231700,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096803331.395, "dur": 24.165, + "args": { + "External id": 231701,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096803372.147, "dur": 18.033, + "args": { + "External id": 231702,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327096803406.064, "dur": 14.346, + "args": { + "External id": 231703,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327096803436.637, "dur": 18.266, + "args": { + "External id": 231704,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803526.120, "dur": 14.064, + "args": { + "External id": 231705,"Record function id": 0, "Ev Idx": 280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803529.348, "dur": 9.940, + "args": { + "External id": 231706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803533.439, "dur": 5.129, + "args": { + "External id": 231707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803534.773, "dur": 3.661, + "args": { + "External id": 231708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803546.745, "dur": 4.985, + "args": { + "External id": 231709,"Record function id": 0, "Ev Idx": 284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803548.432, "dur": 2.872, + "args": { + "External id": 231710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803549.477, "dur": 1.327, + "args": { + "External id": 231711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803549.825, "dur": 0.903, + "args": { + "External id": 231712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803554.879, "dur": 4.182, + "args": { + "External id": 231713,"Record function id": 0, "Ev Idx": 288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803556.054, "dur": 2.550, + "args": { + "External id": 231714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803556.677, "dur": 1.529, + "args": { + "External id": 231715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803557.223, "dur": 0.872, + "args": { + "External id": 231716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803562.183, "dur": 6.188, + "args": { + "External id": 231717,"Record function id": 0, "Ev Idx": 292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803563.421, "dur": 4.531, + "args": { + "External id": 231718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803564.047, "dur": 3.389, + "args": { + "External id": 231719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803564.368, "dur": 2.998, + "args": { + "External id": 231720,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803571.419, "dur": 4.480, + "args": { + "External id": 231721,"Record function id": 0, "Ev Idx": 296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803572.687, "dur": 2.794, + "args": { + "External id": 231722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803573.342, "dur": 1.461, + "args": { + "External id": 231723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803573.895, "dur": 0.817, + "args": { + "External id": 231724,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803579.105, "dur": 3.722, + "args": { + "External id": 231725,"Record function id": 0, "Ev Idx": 300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803580.252, "dur": 2.172, + "args": { + "External id": 231726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803580.845, "dur": 1.161, + "args": { + "External id": 231727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803581.294, "dur": 0.634, + "args": { + "External id": 231728,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803585.945, "dur": 4.239, + "args": { + "External id": 231729,"Record function id": 0, "Ev Idx": 304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803587.183, "dur": 2.552, + "args": { + "External id": 231730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803587.975, "dur": 1.264, + "args": { + "External id": 231731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803588.467, "dur": 0.696, + "args": { + "External id": 231732,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803593.395, "dur": 8.841, + "args": { + "External id": 231733,"Record function id": 0, "Ev Idx": 308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803594.515, "dur": 4.257, + "args": { + "External id": 231734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803595.274, "dur": 3.078, + "args": { + "External id": 231735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803597.652, "dur": 0.626, + "args": { + "External id": 231736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803605.291, "dur": 3.369, + "args": { + "External id": 231737,"Record function id": 0, "Ev Idx": 312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096803606.352, "dur": 1.885, + "args": { + "External id": 231738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803606.834, "dur": 0.997, + "args": { + "External id": 231739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096803607.113, "dur": 0.642, + "args": { + "External id": 231740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096803613.640, "dur": 36201.834, + "args": { + "External id": 231741,"Record function id": 0, "Sequence number": 959165, "Fwd thread id": 1, "Ev Idx": 316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096803614.892, "dur": 36191.654, + "args": { + "External id": 231742,"Sequence number": 959165, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 317 + } + }, + { + "ph": "f", "id": 11, "pid": 2070552, "tid": 2107648, "ts": 5327096803614.892, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.30)", "pid": 2070552, "tid": 2107648, + "ts": 5327096803678.530, "dur": 38.604, + "args": { + "External id": 231743,"Record function id": 0, "Ev Idx": 318 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.30)", "pid": 2070552, "tid": 2107648, + "ts": 5327096803724.439, "dur": 74.149, + "args": { + "External id": 231744,"Record function id": 0, "Ev Idx": 319 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.30)", "pid": 2070552, "tid": 2107648, + "ts": 5327096803803.864, "dur": 35995.309, + "args": { + "External id": 231745,"Record function id": 0, "Ev Idx": 320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096803891.678, "dur": 6.998, + "args": { + "External id": 231746,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096803909.222, "dur": 5.036, + "args": { + "External id": 231747,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327096803930.937, "dur": 35026.812, + "args": { + "External id": 231748,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327096803945.054, "dur": 35003.429, + "args": { + "External id": 231749,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096804011.412, "dur": 17.008, + "args": { + "External id": 231750,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096804035.807, "dur": 34873.926, + "args": { + "External id": 231751,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096804040.280, "dur": 34868.831, + "args": { + "External id": 231752,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096804044.249, "dur": 5.645, + "args": { + "External id": 231753,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096804051.598, "dur": 34853.917, + "args": { + "External id": 231754,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096839056.744, "dur": 9.435, + "args": { + "External id": 231755,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096839060.046, "dur": 5.606, + "args": { + "External id": 231756,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327096839094.236, "dur": 362.355, + "args": { + "External id": 231757,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096839125.315, "dur": 326.992, + "args": { + "External id": 231758,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 333, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327096839138.576, "dur": 308.266, + "args": { + "External id": 231759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096839478.926, "dur": 2.273, + "args": { + "External id": 231760,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 335, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096839536.854, "dur": 6.242, + "args": { + "External id": 231761,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096839588.504, "dur": 1.444, + "args": { + "External id": 231762,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096839606.990, "dur": 1.277, + "args": { + "External id": 231763,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096839660.640, "dur": 1.440, + "args": { + "External id": 231764,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096839677.420, "dur": 1.090, + "args": { + "External id": 231765,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096839690.926, "dur": 0.600, + "args": { + "External id": 231766,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096839703.152, "dur": 1.099, + "args": { + "External id": 231767,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096839719.321, "dur": 2.832, + "args": { + "External id": 231768,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096839736.867, "dur": 1.143, + "args": { + "External id": 231769,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096839830.791, "dur": 2754.220, + "args": { + "External id": 231770,"Record function id": 0, "Ev Idx": 345 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.29)", "pid": 2070552, "tid": 2107648, + "ts": 5327096839851.138, "dur": 1049.590, + "args": { + "External id": 231771,"Record function id": 0, "Ev Idx": 346 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.29)", "pid": 2070552, "tid": 2107648, + "ts": 5327096839864.746, "dur": 335.792, + "args": { + "External id": 231772,"Record function id": 0, "Ev Idx": 347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096839944.771, "dur": 3.794, + "args": { + "External id": 231773,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096839951.880, "dur": 1.306, + "args": { + "External id": 231774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096839955.166, "dur": 1.186, + "args": { + "External id": 231775,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096839958.297, "dur": 3.111, + "args": { + "External id": 231776,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096839964.776, "dur": 0.818, + "args": { + "External id": 231777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096839967.292, "dur": 0.922, + "args": { + "External id": 231778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096839969.885, "dur": 3.073, + "args": { + "External id": 231779,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096839974.618, "dur": 0.783, + "args": { + "External id": 231780,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096839992.693, "dur": 1.448, + "args": { + "External id": 231781,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096839996.697, "dur": 0.843, + "args": { + "External id": 231782,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096840016.666, "dur": 155.300, + "args": { + "External id": 231783,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096840032.426, "dur": 134.825, + "args": { + "External id": 231784,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096840049.465, "dur": 13.785, + "args": { + "External id": 231785,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096840067.602, "dur": 71.515, + "args": { + "External id": 231786,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096840069.919, "dur": 68.800, + "args": { + "External id": 231787,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840073.917, "dur": 9.466, + "args": { + "External id": 231788,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096840087.029, "dur": 51.208, + "args": { + "External id": 231789,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 364 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.28", "pid": 2070552, "tid": 2107648, + "ts": 5327096840289.024, "dur": 604.612, + "args": { + "External id": 231790,"Record function id": 0, "Ev Idx": 365 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.28)", "pid": 2070552, "tid": 2107648, + "ts": 5327096840305.369, "dur": 575.375, + "args": { + "External id": 231791,"Record function id": 0, "Ev Idx": 366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096840364.628, "dur": 4.255, + "args": { + "External id": 231792,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327096840384.049, "dur": 37.021, + "args": { + "External id": 231793,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840388.978, "dur": 1.390, + "args": { + "External id": 231794,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840394.042, "dur": 0.448, + "args": { + "External id": 231795,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840396.570, "dur": 0.444, + "args": { + "External id": 231796,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840398.518, "dur": 2.073, + "args": { + "External id": 231797,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840402.436, "dur": 0.422, + "args": { + "External id": 231798,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840404.441, "dur": 0.330, + "args": { + "External id": 231799,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840408.622, "dur": 2.237, + "args": { + "External id": 231800,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840412.817, "dur": 0.309, + "args": { + "External id": 231801,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840414.562, "dur": 0.238, + "args": { + "External id": 231802,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096840431.500, "dur": 32.643, + "args": { + "External id": 231803,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327096840494.067, "dur": 105.509, + "args": { + "External id": 231804,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096840502.827, "dur": 3.286, + "args": { + "External id": 231805,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327096840511.005, "dur": 11.656, + "args": { + "External id": 231806,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327096840514.923, "dur": 7.267, + "args": { + "External id": 231807,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840518.346, "dur": 2.284, + "args": { + "External id": 231808,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327096840529.110, "dur": 31.202, + "args": { + "External id": 231809,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840531.196, "dur": 0.386, + "args": { + "External id": 231810,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840532.973, "dur": 0.539, + "args": { + "External id": 231811,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840534.973, "dur": 0.377, + "args": { + "External id": 231812,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840538.100, "dur": 1.981, + "args": { + "External id": 231813,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840541.232, "dur": 0.393, + "args": { + "External id": 231814,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840543.032, "dur": 2.200, + "args": { + "External id": 231815,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840546.754, "dur": 0.325, + "args": { + "External id": 231816,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840550.741, "dur": 0.347, + "args": { + "External id": 231817,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096840553.681, "dur": 0.485, + "args": { + "External id": 231818,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096840571.368, "dur": 21.260, + "args": { + "External id": 231819,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327096840683.850, "dur": 119.990, + "args": { + "External id": 231820,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096840714.696, "dur": 85.808, + "args": { + "External id": 231821,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 396, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327096840724.116, "dur": 72.118, + "args": { + "External id": 231822,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096840821.445, "dur": 1.700, + "args": { + "External id": 231823,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 398, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096840907.470, "dur": 1651.180, + "args": { + "External id": 231824,"Sequence number": 959164, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 399 + } + }, + { + "ph": "f", "id": 12, "pid": 2070552, "tid": 2107648, "ts": 5327096840907.470, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096841039.057, "dur": 113.295, + "args": { + "External id": 231825,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327096841195.163, "dur": 39.816, + "args": { + "External id": 231826,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327096841251.080, "dur": 49.280, + "args": { + "External id": 231827,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096841310.965, "dur": 31.912, + "args": { + "External id": 231828,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096841350.626, "dur": 45.281, + "args": { + "External id": 231829,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096841401.783, "dur": 27.684, + "args": { + "External id": 231830,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096841436.841, "dur": 42.834, + "args": { + "External id": 231831,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096841503.534, "dur": 22.272, + "args": { + "External id": 231832,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327096841545.640, "dur": 25.977, + "args": { + "External id": 231833,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096841592.786, "dur": 17.597, + "args": { + "External id": 231834,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327096841665.482, "dur": 20.487, + "args": { + "External id": 231835,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096841696.396, "dur": 37.960, + "args": { + "External id": 231836,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096841737.716, "dur": 33.011, + "args": { + "External id": 231837,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327096841803.827, "dur": 187.515, + "args": { + "External id": 231838,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096841882.769, "dur": 6.860, + "args": { + "External id": 231839,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096841891.602, "dur": 2.580, + "args": { + "External id": 231840,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096842033.758, "dur": 27.807, + "args": { + "External id": 231841,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096842073.298, "dur": 15.075, + "args": { + "External id": 231842,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096842097.203, "dur": 39.637, + "args": { + "External id": 231843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096842142.853, "dur": 34.631, + "args": { + "External id": 231844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096842186.103, "dur": 37.700, + "args": { + "External id": 231845,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096842233.214, "dur": 33.053, + "args": { + "External id": 231846,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096842274.178, "dur": 20.059, + "args": { + "External id": 231847,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096842301.064, "dur": 30.272, + "args": { + "External id": 231848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327096842357.118, "dur": 20.059, + "args": { + "External id": 231849,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096842395.453, "dur": 30.891, + "args": { + "External id": 231850,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096842442.489, "dur": 17.929, + "args": { + "External id": 231851,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327096842478.698, "dur": 18.139, + "args": { + "External id": 231852,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327096842511.051, "dur": 16.000, + "args": { + "External id": 231853,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842606.073, "dur": 49.427, + "args": { + "External id": 231854,"Record function id": 0, "Ev Idx": 429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842609.509, "dur": 44.251, + "args": { + "External id": 231855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842613.434, "dur": 38.812, + "args": { + "External id": 231856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842614.784, "dur": 36.956, + "args": { + "External id": 231857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842662.596, "dur": 6.683, + "args": { + "External id": 231858,"Record function id": 0, "Ev Idx": 433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842664.676, "dur": 4.132, + "args": { + "External id": 231859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842665.897, "dur": 2.344, + "args": { + "External id": 231860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842666.650, "dur": 1.499, + "args": { + "External id": 231861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842672.540, "dur": 6.273, + "args": { + "External id": 231862,"Record function id": 0, "Ev Idx": 437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842673.929, "dur": 4.431, + "args": { + "External id": 231863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842674.414, "dur": 3.370, + "args": { + "External id": 231864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842675.131, "dur": 2.573, + "args": { + "External id": 231865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842682.059, "dur": 4.567, + "args": { + "External id": 231866,"Record function id": 0, "Ev Idx": 441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842683.368, "dur": 2.813, + "args": { + "External id": 231867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842684.188, "dur": 1.236, + "args": { + "External id": 231868,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842684.539, "dur": 0.794, + "args": { + "External id": 231869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842689.585, "dur": 4.018, + "args": { + "External id": 231870,"Record function id": 0, "Ev Idx": 445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842691.020, "dur": 2.190, + "args": { + "External id": 231871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842691.614, "dur": 1.055, + "args": { + "External id": 231872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842691.895, "dur": 0.712, + "args": { + "External id": 231873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842696.606, "dur": 3.734, + "args": { + "External id": 231874,"Record function id": 0, "Ev Idx": 449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842697.760, "dur": 2.196, + "args": { + "External id": 231875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842698.306, "dur": 1.236, + "args": { + "External id": 231876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842698.778, "dur": 0.690, + "args": { + "External id": 231877,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842703.548, "dur": 3.373, + "args": { + "External id": 231878,"Record function id": 0, "Ev Idx": 453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842704.703, "dur": 1.818, + "args": { + "External id": 231879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842705.144, "dur": 0.951, + "args": { + "External id": 231880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842705.416, "dur": 0.605, + "args": { + "External id": 231881,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842709.990, "dur": 3.491, + "args": { + "External id": 231882,"Record function id": 0, "Ev Idx": 457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842711.121, "dur": 1.967, + "args": { + "External id": 231883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842711.546, "dur": 0.997, + "args": { + "External id": 231884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842711.857, "dur": 0.613, + "args": { + "External id": 231885,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842717.708, "dur": 8.787, + "args": { + "External id": 231886,"Record function id": 0, "Ev Idx": 461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096842718.671, "dur": 7.404, + "args": { + "External id": 231887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842719.094, "dur": 6.453, + "args": { + "External id": 231888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096842721.940, "dur": 3.532, + "args": { + "External id": 231889,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096842730.393, "dur": 35672.794, + "args": { + "External id": 231890,"Record function id": 0, "Sequence number": 959163, "Fwd thread id": 1, "Ev Idx": 465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096842731.749, "dur": 35663.424, + "args": { + "External id": 231891,"Sequence number": 959163, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 466 + } + }, + { + "ph": "f", "id": 13, "pid": 2070552, "tid": 2107648, "ts": 5327096842731.749, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.29)", "pid": 2070552, "tid": 2107648, + "ts": 5327096842760.737, "dur": 39.237, + "args": { + "External id": 231892,"Record function id": 0, "Ev Idx": 467 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.29)", "pid": 2070552, "tid": 2107648, + "ts": 5327096842807.236, "dur": 63.075, + "args": { + "External id": 231893,"Record function id": 0, "Ev Idx": 468 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.29)", "pid": 2070552, "tid": 2107648, + "ts": 5327096842876.363, "dur": 35511.096, + "args": { + "External id": 231894,"Record function id": 0, "Ev Idx": 469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096842961.304, "dur": 6.011, + "args": { + "External id": 231895,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096842990.503, "dur": 6.451, + "args": { + "External id": 231896,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327096843017.930, "dur": 34524.290, + "args": { + "External id": 231897,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327096843031.946, "dur": 34501.711, + "args": { + "External id": 231898,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096843067.625, "dur": 14.099, + "args": { + "External id": 231899,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096843088.071, "dur": 34407.736, + "args": { + "External id": 231900,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096843090.385, "dur": 34404.633, + "args": { + "External id": 231901,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096843096.430, "dur": 5.992, + "args": { + "External id": 231902,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096843103.983, "dur": 34387.416, + "args": { + "External id": 231903,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096877653.577, "dur": 9.784, + "args": { + "External id": 231904,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096877656.557, "dur": 6.224, + "args": { + "External id": 231905,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327096877692.051, "dur": 388.769, + "args": { + "External id": 231906,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096877722.367, "dur": 353.108, + "args": { + "External id": 231907,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 482, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327096877734.746, "dur": 334.629, + "args": { + "External id": 231908,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096878105.282, "dur": 2.160, + "args": { + "External id": 231909,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 484, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096878168.073, "dur": 6.251, + "args": { + "External id": 231910,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096878219.990, "dur": 1.546, + "args": { + "External id": 231911,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096878238.232, "dur": 3.214, + "args": { + "External id": 231912,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096878254.786, "dur": 0.938, + "args": { + "External id": 231913,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096878270.610, "dur": 1.040, + "args": { + "External id": 231914,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096878283.372, "dur": 0.887, + "args": { + "External id": 231915,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096878296.323, "dur": 2.871, + "args": { + "External id": 231916,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096878311.195, "dur": 3.040, + "args": { + "External id": 231917,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096878326.059, "dur": 0.961, + "args": { + "External id": 231918,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096878417.162, "dur": 2880.812, + "args": { + "External id": 231919,"Record function id": 0, "Ev Idx": 494 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.28)", "pid": 2070552, "tid": 2107648, + "ts": 5327096878439.847, "dur": 1087.844, + "args": { + "External id": 231920,"Record function id": 0, "Ev Idx": 495 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.28)", "pid": 2070552, "tid": 2107648, + "ts": 5327096878455.197, "dur": 382.846, + "args": { + "External id": 231921,"Record function id": 0, "Ev Idx": 496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096878534.281, "dur": 4.002, + "args": { + "External id": 231922,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096878541.440, "dur": 1.330, + "args": { + "External id": 231923,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096878544.610, "dur": 3.255, + "args": { + "External id": 231924,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096878549.797, "dur": 0.955, + "args": { + "External id": 231925,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096878552.161, "dur": 1.082, + "args": { + "External id": 231926,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096878556.397, "dur": 0.830, + "args": { + "External id": 231927,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096878558.830, "dur": 3.369, + "args": { + "External id": 231928,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096878563.926, "dur": 0.997, + "args": { + "External id": 231929,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096878566.545, "dur": 1.020, + "args": { + "External id": 231930,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096878570.920, "dur": 1.018, + "args": { + "External id": 231931,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096878589.534, "dur": 212.285, + "args": { + "External id": 231932,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096878604.731, "dur": 191.654, + "args": { + "External id": 231933,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096878659.495, "dur": 14.553, + "args": { + "External id": 231934,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096878678.431, "dur": 88.672, + "args": { + "External id": 231935,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096878680.825, "dur": 85.925, + "args": { + "External id": 231936,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096878704.813, "dur": 8.004, + "args": { + "External id": 231937,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096878714.507, "dur": 51.761, + "args": { + "External id": 231938,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 513 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.27", "pid": 2070552, "tid": 2107648, + "ts": 5327096878929.746, "dur": 590.757, + "args": { + "External id": 231939,"Record function id": 0, "Ev Idx": 514 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.27)", "pid": 2070552, "tid": 2107648, + "ts": 5327096878949.154, "dur": 558.409, + "args": { + "External id": 231940,"Record function id": 0, "Ev Idx": 515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096879028.660, "dur": 6.275, + "args": { + "External id": 231941,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327096879050.956, "dur": 38.524, + "args": { + "External id": 231942,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879055.615, "dur": 1.568, + "args": { + "External id": 231943,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879059.728, "dur": 2.253, + "args": { + "External id": 231944,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879063.539, "dur": 0.364, + "args": { + "External id": 231945,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879065.489, "dur": 0.612, + "args": { + "External id": 231946,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879070.207, "dur": 0.362, + "args": { + "External id": 231947,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879072.535, "dur": 3.131, + "args": { + "External id": 231948,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879077.401, "dur": 0.321, + "args": { + "External id": 231949,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879081.275, "dur": 0.342, + "args": { + "External id": 231950,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879083.221, "dur": 0.398, + "args": { + "External id": 231951,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096879099.461, "dur": 34.545, + "args": { + "External id": 231952,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327096879165.919, "dur": 112.650, + "args": { + "External id": 231953,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096879177.716, "dur": 3.209, + "args": { + "External id": 231954,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327096879185.755, "dur": 11.746, + "args": { + "External id": 231955,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327096879191.569, "dur": 5.483, + "args": { + "External id": 231956,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879195.096, "dur": 0.620, + "args": { + "External id": 231957,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327096879204.727, "dur": 33.052, + "args": { + "External id": 231958,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879206.570, "dur": 0.559, + "args": { + "External id": 231959,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879210.948, "dur": 0.512, + "args": { + "External id": 231960,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879212.860, "dur": 2.458, + "args": { + "External id": 231961,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879217.203, "dur": 2.004, + "args": { + "External id": 231962,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879220.787, "dur": 0.470, + "args": { + "External id": 231963,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879223.101, "dur": 0.334, + "args": { + "External id": 231964,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879226.906, "dur": 0.419, + "args": { + "External id": 231965,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879228.927, "dur": 0.537, + "args": { + "External id": 231966,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096879230.795, "dur": 0.603, + "args": { + "External id": 231967,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096879250.125, "dur": 20.915, + "args": { + "External id": 231968,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327096879321.350, "dur": 119.655, + "args": { + "External id": 231969,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096879349.861, "dur": 87.899, + "args": { + "External id": 231970,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 545, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327096879358.286, "dur": 75.310, + "args": { + "External id": 231971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096879458.272, "dur": 1.886, + "args": { + "External id": 231972,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 547, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096879534.631, "dur": 1736.610, + "args": { + "External id": 231973,"Sequence number": 959162, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 548 + } + }, + { + "ph": "f", "id": 14, "pid": 2070552, "tid": 2107648, "ts": 5327096879534.631, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096879685.420, "dur": 108.215, + "args": { + "External id": 231974,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327096879835.354, "dur": 38.294, + "args": { + "External id": 231975,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327096879902.037, "dur": 50.731, + "args": { + "External id": 231976,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096879967.051, "dur": 50.033, + "args": { + "External id": 231977,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096880026.459, "dur": 47.701, + "args": { + "External id": 231978,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096880080.382, "dur": 28.646, + "args": { + "External id": 231979,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096880115.371, "dur": 41.281, + "args": { + "External id": 231980,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096880189.927, "dur": 24.114, + "args": { + "External id": 231981,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327096880235.730, "dur": 28.704, + "args": { + "External id": 231982,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096880287.314, "dur": 19.723, + "args": { + "External id": 231983,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327096880320.875, "dur": 16.506, + "args": { + "External id": 231984,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096880344.636, "dur": 28.952, + "args": { + "External id": 231985,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096880376.731, "dur": 33.318, + "args": { + "External id": 231986,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327096880441.817, "dur": 215.069, + "args": { + "External id": 231987,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096880524.639, "dur": 5.603, + "args": { + "External id": 231988,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096880532.202, "dur": 2.038, + "args": { + "External id": 231989,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096880700.069, "dur": 28.264, + "args": { + "External id": 231990,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096880744.756, "dur": 18.277, + "args": { + "External id": 231991,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096880772.329, "dur": 41.202, + "args": { + "External id": 231992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096880819.072, "dur": 37.375, + "args": { + "External id": 231993,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096880877.409, "dur": 32.122, + "args": { + "External id": 231994,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096880915.001, "dur": 36.349, + "args": { + "External id": 231995,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096880958.248, "dur": 38.125, + "args": { + "External id": 231996,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096881010.183, "dur": 33.035, + "args": { + "External id": 231997,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327096881073.019, "dur": 22.853, + "args": { + "External id": 231998,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096881119.804, "dur": 24.642, + "args": { + "External id": 231999,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096881162.885, "dur": 17.507, + "args": { + "External id": 232000,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327096881195.801, "dur": 14.867, + "args": { + "External id": 232001,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327096881225.544, "dur": 17.279, + "args": { + "External id": 232002,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881319.528, "dur": 15.037, + "args": { + "External id": 232003,"Record function id": 0, "Ev Idx": 578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881322.948, "dur": 10.726, + "args": { + "External id": 232004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881327.170, "dur": 5.597, + "args": { + "External id": 232005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881328.770, "dur": 3.904, + "args": { + "External id": 232006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881338.236, "dur": 7.314, + "args": { + "External id": 232007,"Record function id": 0, "Ev Idx": 582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881339.929, "dur": 5.159, + "args": { + "External id": 232008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881341.039, "dur": 3.397, + "args": { + "External id": 232009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881341.544, "dur": 2.812, + "args": { + "External id": 232010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881348.883, "dur": 5.288, + "args": { + "External id": 232011,"Record function id": 0, "Ev Idx": 586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881350.613, "dur": 3.132, + "args": { + "External id": 232012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881351.636, "dur": 1.685, + "args": { + "External id": 232013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881352.413, "dur": 0.818, + "args": { + "External id": 232014,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881357.320, "dur": 4.454, + "args": { + "External id": 232015,"Record function id": 0, "Ev Idx": 590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881358.825, "dur": 2.534, + "args": { + "External id": 232016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881359.886, "dur": 1.020, + "args": { + "External id": 232017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881360.184, "dur": 0.629, + "args": { + "External id": 232018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881364.808, "dur": 4.297, + "args": { + "External id": 232019,"Record function id": 0, "Ev Idx": 594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881366.444, "dur": 2.263, + "args": { + "External id": 232020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881366.991, "dur": 1.277, + "args": { + "External id": 232021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881367.545, "dur": 0.647, + "args": { + "External id": 232022,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881372.141, "dur": 4.251, + "args": { + "External id": 232023,"Record function id": 0, "Ev Idx": 598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881373.545, "dur": 2.447, + "args": { + "External id": 232024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881374.330, "dur": 1.229, + "args": { + "External id": 232025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881374.696, "dur": 0.791, + "args": { + "External id": 232026,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881379.543, "dur": 4.121, + "args": { + "External id": 232027,"Record function id": 0, "Ev Idx": 602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881380.820, "dur": 2.450, + "args": { + "External id": 232028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881381.663, "dur": 0.960, + "args": { + "External id": 232029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881381.930, "dur": 0.616, + "args": { + "External id": 232030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881386.659, "dur": 6.006, + "args": { + "External id": 232031,"Record function id": 0, "Ev Idx": 606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881387.878, "dur": 4.382, + "args": { + "External id": 232032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881388.712, "dur": 3.003, + "args": { + "External id": 232033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881390.950, "dur": 0.659, + "args": { + "External id": 232034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881395.638, "dur": 6.114, + "args": { + "External id": 232035,"Record function id": 0, "Ev Idx": 610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096881397.186, "dur": 4.164, + "args": { + "External id": 232036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881398.197, "dur": 2.752, + "args": { + "External id": 232037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096881398.472, "dur": 2.405, + "args": { + "External id": 232038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096881406.700, "dur": 38458.572, + "args": { + "External id": 232039,"Record function id": 0, "Sequence number": 959161, "Fwd thread id": 1, "Ev Idx": 614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096881407.856, "dur": 38448.739, + "args": { + "External id": 232040,"Sequence number": 959161, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 615 + } + }, + { + "ph": "f", "id": 15, "pid": 2070552, "tid": 2107648, "ts": 5327096881407.856, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.28)", "pid": 2070552, "tid": 2107648, + "ts": 5327096881438.609, "dur": 40.103, + "args": { + "External id": 232041,"Record function id": 0, "Ev Idx": 616 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.28)", "pid": 2070552, "tid": 2107648, + "ts": 5327096881486.978, "dur": 68.940, + "args": { + "External id": 232042,"Record function id": 0, "Ev Idx": 617 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.28)", "pid": 2070552, "tid": 2107648, + "ts": 5327096881561.729, "dur": 38286.725, + "args": { + "External id": 232043,"Record function id": 0, "Ev Idx": 618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096881687.767, "dur": 8.434, + "args": { + "External id": 232044,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096881713.352, "dur": 4.939, + "args": { + "External id": 232045,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327096881736.730, "dur": 37260.534, + "args": { + "External id": 232046,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327096881750.872, "dur": 37236.841, + "args": { + "External id": 232047,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096881789.513, "dur": 13.976, + "args": { + "External id": 232048,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096881809.860, "dur": 37125.725, + "args": { + "External id": 232049,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096881813.592, "dur": 37121.103, + "args": { + "External id": 232050,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096881817.353, "dur": 5.836, + "args": { + "External id": 232051,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096881824.992, "dur": 37106.395, + "args": { + "External id": 232052,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096919089.318, "dur": 9.264, + "args": { + "External id": 232053,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096919092.460, "dur": 5.733, + "args": { + "External id": 232054,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327096919127.292, "dur": 358.911, + "args": { + "External id": 232055,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096919159.157, "dur": 322.161, + "args": { + "External id": 232056,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 631, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327096919171.352, "dur": 303.909, + "args": { + "External id": 232057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096919510.832, "dur": 2.001, + "args": { + "External id": 232058,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 633, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096919574.463, "dur": 6.293, + "args": { + "External id": 232059,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096919671.246, "dur": 4.164, + "args": { + "External id": 232060,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096919695.368, "dur": 1.275, + "args": { + "External id": 232061,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096919709.283, "dur": 1.179, + "args": { + "External id": 232062,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096919725.743, "dur": 0.967, + "args": { + "External id": 232063,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096919738.200, "dur": 3.166, + "args": { + "External id": 232064,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096919753.979, "dur": 0.979, + "args": { + "External id": 232065,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096919767.582, "dur": 2.844, + "args": { + "External id": 232066,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096919782.330, "dur": 0.988, + "args": { + "External id": 232067,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096919881.294, "dur": 2807.133, + "args": { + "External id": 232068,"Record function id": 0, "Ev Idx": 643 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.27)", "pid": 2070552, "tid": 2107648, + "ts": 5327096919903.328, "dur": 1067.620, + "args": { + "External id": 232069,"Record function id": 0, "Ev Idx": 644 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.27)", "pid": 2070552, "tid": 2107648, + "ts": 5327096919918.455, "dur": 343.689, + "args": { + "External id": 232070,"Record function id": 0, "Ev Idx": 645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096920016.981, "dur": 6.550, + "args": { + "External id": 232071,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096920027.089, "dur": 1.054, + "args": { + "External id": 232072,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096920030.024, "dur": 1.021, + "args": { + "External id": 232073,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096920032.937, "dur": 0.791, + "args": { + "External id": 232074,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096920035.328, "dur": 1.053, + "args": { + "External id": 232075,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096920039.481, "dur": 0.736, + "args": { + "External id": 232076,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096920041.909, "dur": 2.408, + "args": { + "External id": 232077,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096920045.978, "dur": 0.830, + "args": { + "External id": 232078,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096920048.471, "dur": 2.734, + "args": { + "External id": 232079,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096920054.846, "dur": 1.198, + "args": { + "External id": 232080,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096920074.888, "dur": 157.007, + "args": { + "External id": 232081,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096920091.174, "dur": 135.975, + "args": { + "External id": 232082,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096920111.409, "dur": 13.357, + "args": { + "External id": 232083,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096920129.320, "dur": 70.239, + "args": { + "External id": 232084,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096920131.658, "dur": 67.618, + "args": { + "External id": 232085,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920136.586, "dur": 7.177, + "args": { + "External id": 232086,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096920145.321, "dur": 53.472, + "args": { + "External id": 232087,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 662 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.26", "pid": 2070552, "tid": 2107648, + "ts": 5327096920349.772, "dur": 614.039, + "args": { + "External id": 232088,"Record function id": 0, "Ev Idx": 663 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.26)", "pid": 2070552, "tid": 2107648, + "ts": 5327096920369.371, "dur": 582.476, + "args": { + "External id": 232089,"Record function id": 0, "Ev Idx": 664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096920428.988, "dur": 5.012, + "args": { + "External id": 232090,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327096920449.058, "dur": 33.133, + "args": { + "External id": 232091,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920453.595, "dur": 1.638, + "args": { + "External id": 232092,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920457.304, "dur": 1.064, + "args": { + "External id": 232093,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920460.272, "dur": 0.474, + "args": { + "External id": 232094,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920462.529, "dur": 2.469, + "args": { + "External id": 232095,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920467.746, "dur": 0.323, + "args": { + "External id": 232096,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920469.580, "dur": 0.326, + "args": { + "External id": 232097,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920471.407, "dur": 0.588, + "args": { + "External id": 232098,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920473.921, "dur": 0.323, + "args": { + "External id": 232099,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920476.181, "dur": 0.361, + "args": { + "External id": 232100,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096920492.297, "dur": 32.102, + "args": { + "External id": 232101,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327096920554.771, "dur": 163.692, + "args": { + "External id": 232102,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096920563.780, "dur": 3.206, + "args": { + "External id": 232103,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327096920571.564, "dur": 11.901, + "args": { + "External id": 232104,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327096920577.678, "dur": 5.362, + "args": { + "External id": 232105,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920581.009, "dur": 0.765, + "args": { + "External id": 232106,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327096920590.175, "dur": 80.018, + "args": { + "External id": 232107,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920592.224, "dur": 2.313, + "args": { + "External id": 232108,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920597.477, "dur": 0.487, + "args": { + "External id": 232109,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920599.605, "dur": 0.403, + "args": { + "External id": 232110,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920601.754, "dur": 1.395, + "args": { + "External id": 232111,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920604.815, "dur": 0.416, + "args": { + "External id": 232112,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920606.674, "dur": 0.341, + "args": { + "External id": 232113,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920610.373, "dur": 0.376, + "args": { + "External id": 232114,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920612.768, "dur": 0.387, + "args": { + "External id": 232115,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096920614.737, "dur": 2.198, + "args": { + "External id": 232116,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096920684.579, "dur": 25.657, + "args": { + "External id": 232117,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327096920763.202, "dur": 119.528, + "args": { + "External id": 232118,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096920792.280, "dur": 87.259, + "args": { + "External id": 232119,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 694, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327096920801.532, "dur": 72.858, + "args": { + "External id": 232120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096920900.451, "dur": 1.908, + "args": { + "External id": 232121,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 696, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096920992.912, "dur": 1668.327, + "args": { + "External id": 232122,"Sequence number": 959160, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 697 + } + }, + { + "ph": "f", "id": 16, "pid": 2070552, "tid": 2107648, "ts": 5327096920992.912, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096921104.075, "dur": 105.584, + "args": { + "External id": 232123,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327096921251.301, "dur": 39.915, + "args": { + "External id": 232124,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327096921309.074, "dur": 49.592, + "args": { + "External id": 232125,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096921368.365, "dur": 33.568, + "args": { + "External id": 232126,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096921407.761, "dur": 45.879, + "args": { + "External id": 232127,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096921459.663, "dur": 30.794, + "args": { + "External id": 232128,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096921500.163, "dur": 43.168, + "args": { + "External id": 232129,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096921567.904, "dur": 23.944, + "args": { + "External id": 232130,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327096921611.006, "dur": 68.955, + "args": { + "External id": 232131,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096921706.818, "dur": 21.561, + "args": { + "External id": 232132,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327096921744.293, "dur": 18.166, + "args": { + "External id": 232133,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096921781.712, "dur": 33.958, + "args": { + "External id": 232134,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096921818.991, "dur": 36.508, + "args": { + "External id": 232135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327096921882.505, "dur": 197.414, + "args": { + "External id": 232136,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096921967.787, "dur": 6.587, + "args": { + "External id": 232137,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096921991.148, "dur": 4.075, + "args": { + "External id": 232138,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096922117.387, "dur": 25.511, + "args": { + "External id": 232139,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096922155.627, "dur": 13.386, + "args": { + "External id": 232140,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096922177.202, "dur": 41.244, + "args": { + "External id": 232141,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096922226.161, "dur": 38.905, + "args": { + "External id": 232142,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096922271.827, "dur": 37.508, + "args": { + "External id": 232143,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096922319.368, "dur": 35.799, + "args": { + "External id": 232144,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096922361.934, "dur": 18.696, + "args": { + "External id": 232145,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096922389.250, "dur": 29.124, + "args": { + "External id": 232146,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327096922442.257, "dur": 20.852, + "args": { + "External id": 232147,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096922482.951, "dur": 22.898, + "args": { + "External id": 232148,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096922528.454, "dur": 15.830, + "args": { + "External id": 232149,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327096922558.514, "dur": 14.333, + "args": { + "External id": 232150,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327096922586.365, "dur": 15.122, + "args": { + "External id": 232151,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922709.500, "dur": 16.666, + "args": { + "External id": 232152,"Record function id": 0, "Ev Idx": 727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922713.320, "dur": 11.878, + "args": { + "External id": 232153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922717.457, "dur": 6.779, + "args": { + "External id": 232154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922718.732, "dur": 5.408, + "args": { + "External id": 232155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922730.099, "dur": 8.071, + "args": { + "External id": 232156,"Record function id": 0, "Ev Idx": 731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922734.743, "dur": 3.019, + "args": { + "External id": 232157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922735.449, "dur": 1.847, + "args": { + "External id": 232158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922736.128, "dur": 1.094, + "args": { + "External id": 232159,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922741.983, "dur": 4.211, + "args": { + "External id": 232160,"Record function id": 0, "Ev Idx": 735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922743.265, "dur": 2.544, + "args": { + "External id": 232161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922743.992, "dur": 1.382, + "args": { + "External id": 232162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922744.648, "dur": 0.632, + "args": { + "External id": 232163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922749.359, "dur": 4.632, + "args": { + "External id": 232164,"Record function id": 0, "Ev Idx": 739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922750.882, "dur": 2.720, + "args": { + "External id": 232165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922751.587, "dur": 1.345, + "args": { + "External id": 232166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922752.286, "dur": 0.569, + "args": { + "External id": 232167,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922757.015, "dur": 6.176, + "args": { + "External id": 232168,"Record function id": 0, "Ev Idx": 743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922758.356, "dur": 4.449, + "args": { + "External id": 232169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922759.020, "dur": 3.360, + "args": { + "External id": 232170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922761.724, "dur": 0.547, + "args": { + "External id": 232171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922766.235, "dur": 3.713, + "args": { + "External id": 232172,"Record function id": 0, "Ev Idx": 747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922767.387, "dur": 2.180, + "args": { + "External id": 232173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922767.821, "dur": 1.317, + "args": { + "External id": 232174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922768.313, "dur": 0.735, + "args": { + "External id": 232175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922773.047, "dur": 3.955, + "args": { + "External id": 232176,"Record function id": 0, "Ev Idx": 751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922774.299, "dur": 2.306, + "args": { + "External id": 232177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922774.736, "dur": 1.235, + "args": { + "External id": 232178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922775.200, "dur": 0.698, + "args": { + "External id": 232179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922780.027, "dur": 5.935, + "args": { + "External id": 232180,"Record function id": 0, "Ev Idx": 755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922781.185, "dur": 4.366, + "args": { + "External id": 232181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922781.617, "dur": 3.484, + "args": { + "External id": 232182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922781.994, "dur": 3.033, + "args": { + "External id": 232183,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922791.895, "dur": 5.896, + "args": { + "External id": 232184,"Record function id": 0, "Ev Idx": 759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096922793.216, "dur": 4.162, + "args": { + "External id": 232185,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922793.726, "dur": 3.213, + "args": { + "External id": 232186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096922796.083, "dur": 0.757, + "args": { + "External id": 232187,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096922802.726, "dur": 35653.585, + "args": { + "External id": 232188,"Record function id": 0, "Sequence number": 959159, "Fwd thread id": 1, "Ev Idx": 763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096922803.958, "dur": 35643.485, + "args": { + "External id": 232189,"Sequence number": 959159, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 764 + } + }, + { + "ph": "f", "id": 17, "pid": 2070552, "tid": 2107648, "ts": 5327096922803.958, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.27)", "pid": 2070552, "tid": 2107648, + "ts": 5327096922832.035, "dur": 38.216, + "args": { + "External id": 232190,"Record function id": 0, "Ev Idx": 765 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.27)", "pid": 2070552, "tid": 2107648, + "ts": 5327096922878.120, "dur": 61.774, + "args": { + "External id": 232191,"Record function id": 0, "Ev Idx": 766 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.27)", "pid": 2070552, "tid": 2107648, + "ts": 5327096922945.947, "dur": 35494.379, + "args": { + "External id": 232192,"Record function id": 0, "Ev Idx": 767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096923050.532, "dur": 6.754, + "args": { + "External id": 232193,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096923066.698, "dur": 5.164, + "args": { + "External id": 232194,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327096923090.229, "dur": 34508.276, + "args": { + "External id": 232195,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327096923107.295, "dur": 34482.378, + "args": { + "External id": 232196,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096923147.164, "dur": 14.047, + "args": { + "External id": 232197,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096923167.177, "dur": 34384.146, + "args": { + "External id": 232198,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096923169.690, "dur": 34380.925, + "args": { + "External id": 232199,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096923174.585, "dur": 4.654, + "args": { + "External id": 232200,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096923180.890, "dur": 34366.114, + "args": { + "External id": 232201,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096957721.855, "dur": 9.789, + "args": { + "External id": 232202,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096957724.924, "dur": 6.243, + "args": { + "External id": 232203,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327096957759.271, "dur": 382.753, + "args": { + "External id": 232204,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096957791.446, "dur": 346.024, + "args": { + "External id": 232205,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 780, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327096957804.063, "dur": 327.795, + "args": { + "External id": 232206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096958165.398, "dur": 2.158, + "args": { + "External id": 232207,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 782, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096958226.407, "dur": 6.183, + "args": { + "External id": 232208,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096958279.854, "dur": 1.441, + "args": { + "External id": 232209,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096958298.451, "dur": 1.583, + "args": { + "External id": 232210,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096958313.432, "dur": 1.085, + "args": { + "External id": 232211,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096958326.871, "dur": 1.340, + "args": { + "External id": 232212,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096958341.424, "dur": 0.987, + "args": { + "External id": 232213,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096958353.569, "dur": 1.108, + "args": { + "External id": 232214,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096958365.585, "dur": 2.898, + "args": { + "External id": 232215,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096958378.462, "dur": 1.151, + "args": { + "External id": 232216,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096958470.044, "dur": 2792.543, + "args": { + "External id": 232217,"Record function id": 0, "Ev Idx": 792 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.26)", "pid": 2070552, "tid": 2107648, + "ts": 5327096958488.744, "dur": 1053.622, + "args": { + "External id": 232218,"Record function id": 0, "Ev Idx": 793 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.26)", "pid": 2070552, "tid": 2107648, + "ts": 5327096958501.662, "dur": 365.138, + "args": { + "External id": 232219,"Record function id": 0, "Ev Idx": 794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096958580.807, "dur": 4.252, + "args": { + "External id": 232220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096958587.996, "dur": 0.908, + "args": { + "External id": 232221,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096958592.402, "dur": 1.032, + "args": { + "External id": 232222,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096958595.269, "dur": 1.287, + "args": { + "External id": 232223,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096958597.927, "dur": 1.083, + "args": { + "External id": 232224,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096958600.146, "dur": 0.933, + "args": { + "External id": 232225,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096958604.710, "dur": 3.123, + "args": { + "External id": 232226,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096958609.401, "dur": 2.759, + "args": { + "External id": 232227,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096958613.520, "dur": 0.979, + "args": { + "External id": 232228,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096958615.880, "dur": 0.976, + "args": { + "External id": 232229,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096958684.195, "dur": 150.789, + "args": { + "External id": 232230,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096958701.007, "dur": 129.346, + "args": { + "External id": 232231,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096958720.093, "dur": 13.168, + "args": { + "External id": 232232,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096958737.239, "dur": 65.316, + "args": { + "External id": 232233,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096958739.947, "dur": 62.244, + "args": { + "External id": 232234,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096958745.029, "dur": 4.897, + "args": { + "External id": 232235,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096958751.559, "dur": 50.211, + "args": { + "External id": 232236,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 811 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 2070552, "tid": 2107648, + "ts": 5327096958955.533, "dur": 579.615, + "args": { + "External id": 232237,"Record function id": 0, "Ev Idx": 812 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2070552, "tid": 2107648, + "ts": 5327096958972.867, "dur": 550.001, + "args": { + "External id": 232238,"Record function id": 0, "Ev Idx": 813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096959053.582, "dur": 6.121, + "args": { + "External id": 232239,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327096959075.320, "dur": 32.600, + "args": { + "External id": 232240,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959080.102, "dur": 1.531, + "args": { + "External id": 232241,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959083.169, "dur": 2.087, + "args": { + "External id": 232242,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959086.292, "dur": 2.477, + "args": { + "External id": 232243,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959089.620, "dur": 0.566, + "args": { + "External id": 232244,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959092.662, "dur": 0.564, + "args": { + "External id": 232245,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959094.686, "dur": 0.541, + "args": { + "External id": 232246,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959096.731, "dur": 0.591, + "args": { + "External id": 232247,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959099.930, "dur": 0.497, + "args": { + "External id": 232248,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959101.277, "dur": 0.251, + "args": { + "External id": 232249,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096959120.841, "dur": 35.381, + "args": { + "External id": 232250,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327096959189.930, "dur": 107.158, + "args": { + "External id": 232251,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096959200.235, "dur": 4.209, + "args": { + "External id": 232252,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327096959209.526, "dur": 11.054, + "args": { + "External id": 232253,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327096959213.513, "dur": 6.646, + "args": { + "External id": 232254,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959216.598, "dur": 2.004, + "args": { + "External id": 232255,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327096959227.165, "dur": 27.927, + "args": { + "External id": 232256,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959228.885, "dur": 0.345, + "args": { + "External id": 232257,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959230.405, "dur": 2.216, + "args": { + "External id": 232258,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959233.385, "dur": 0.459, + "args": { + "External id": 232259,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959234.704, "dur": 0.245, + "args": { + "External id": 232260,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959238.054, "dur": 0.470, + "args": { + "External id": 232261,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959239.741, "dur": 0.372, + "args": { + "External id": 232262,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959240.980, "dur": 0.473, + "args": { + "External id": 232263,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959244.456, "dur": 2.632, + "args": { + "External id": 232264,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096959248.636, "dur": 0.367, + "args": { + "External id": 232265,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096959269.435, "dur": 19.888, + "args": { + "External id": 232266,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327096959340.834, "dur": 113.445, + "args": { + "External id": 232267,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096959368.243, "dur": 82.971, + "args": { + "External id": 232268,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 843, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327096959376.876, "dur": 70.623, + "args": { + "External id": 232269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096959471.769, "dur": 2.012, + "args": { + "External id": 232270,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 845, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096959549.230, "dur": 1687.070, + "args": { + "External id": 232271,"Sequence number": 959158, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 846 + } + }, + { + "ph": "f", "id": 18, "pid": 2070552, "tid": 2107648, "ts": 5327096959549.230, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096959698.527, "dur": 105.175, + "args": { + "External id": 232272,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327096959844.814, "dur": 37.682, + "args": { + "External id": 232273,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327096959900.560, "dur": 46.921, + "args": { + "External id": 232274,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096959957.333, "dur": 47.703, + "args": { + "External id": 232275,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096960014.917, "dur": 47.053, + "args": { + "External id": 232276,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096960068.497, "dur": 27.988, + "args": { + "External id": 232277,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096960106.387, "dur": 44.783, + "args": { + "External id": 232278,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096960175.876, "dur": 27.293, + "args": { + "External id": 232279,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327096960223.032, "dur": 29.748, + "args": { + "External id": 232280,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096960273.757, "dur": 17.692, + "args": { + "External id": 232281,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327096960304.434, "dur": 13.874, + "args": { + "External id": 232282,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096960328.232, "dur": 27.534, + "args": { + "External id": 232283,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096960358.527, "dur": 31.610, + "args": { + "External id": 232284,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327096960418.636, "dur": 165.233, + "args": { + "External id": 232285,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096960490.146, "dur": 5.738, + "args": { + "External id": 232286,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096960497.809, "dur": 3.766, + "args": { + "External id": 232287,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096960618.002, "dur": 77.637, + "args": { + "External id": 232288,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327096960714.846, "dur": 15.082, + "args": { + "External id": 232289,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096960741.955, "dur": 46.141, + "args": { + "External id": 232290,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096960793.456, "dur": 38.210, + "args": { + "External id": 232291,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096960839.546, "dur": 21.818, + "args": { + "External id": 232292,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096960865.431, "dur": 29.765, + "args": { + "External id": 232293,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096960903.151, "dur": 21.652, + "args": { + "External id": 232294,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327096960931.802, "dur": 67.429, + "args": { + "External id": 232295,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327096961035.246, "dur": 26.129, + "args": { + "External id": 232296,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096961080.308, "dur": 27.348, + "args": { + "External id": 232297,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327096961120.901, "dur": 21.381, + "args": { + "External id": 232298,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327096961159.556, "dur": 15.207, + "args": { + "External id": 232299,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327096961187.751, "dur": 19.359, + "args": { + "External id": 232300,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961283.249, "dur": 16.592, + "args": { + "External id": 232301,"Record function id": 0, "Ev Idx": 876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961286.647, "dur": 12.276, + "args": { + "External id": 232302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961290.551, "dur": 7.416, + "args": { + "External id": 232303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961291.963, "dur": 5.864, + "args": { + "External id": 232304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961303.734, "dur": 4.966, + "args": { + "External id": 232305,"Record function id": 0, "Ev Idx": 880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961305.023, "dur": 3.229, + "args": { + "External id": 232306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961306.203, "dur": 1.500, + "args": { + "External id": 232307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961306.551, "dur": 1.063, + "args": { + "External id": 232308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961311.974, "dur": 4.080, + "args": { + "External id": 232309,"Record function id": 0, "Ev Idx": 884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961313.172, "dur": 2.488, + "args": { + "External id": 232310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961313.679, "dur": 1.588, + "args": { + "External id": 232311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961314.226, "dur": 0.941, + "args": { + "External id": 232312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961319.230, "dur": 4.756, + "args": { + "External id": 232313,"Record function id": 0, "Ev Idx": 888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961320.805, "dur": 2.742, + "args": { + "External id": 232314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961321.992, "dur": 1.001, + "args": { + "External id": 232315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961322.309, "dur": 0.605, + "args": { + "External id": 232316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961327.043, "dur": 3.947, + "args": { + "External id": 232317,"Record function id": 0, "Ev Idx": 892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961328.105, "dur": 2.480, + "args": { + "External id": 232318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961328.672, "dur": 1.114, + "args": { + "External id": 232319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961328.970, "dur": 0.749, + "args": { + "External id": 232320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961334.051, "dur": 5.081, + "args": { + "External id": 232321,"Record function id": 0, "Ev Idx": 896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961335.031, "dur": 3.704, + "args": { + "External id": 232322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961335.480, "dur": 2.715, + "args": { + "External id": 232323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961337.510, "dur": 0.574, + "args": { + "External id": 232324,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961342.359, "dur": 3.517, + "args": { + "External id": 232325,"Record function id": 0, "Ev Idx": 900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961343.357, "dur": 2.116, + "args": { + "External id": 232326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961343.863, "dur": 0.982, + "args": { + "External id": 232327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961344.161, "dur": 0.594, + "args": { + "External id": 232328,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961348.883, "dur": 5.376, + "args": { + "External id": 232329,"Record function id": 0, "Ev Idx": 904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961349.807, "dur": 4.052, + "args": { + "External id": 232330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961350.305, "dur": 3.133, + "args": { + "External id": 232331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961350.829, "dur": 2.541, + "args": { + "External id": 232332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961357.216, "dur": 4.090, + "args": { + "External id": 232333,"Record function id": 0, "Ev Idx": 908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327096961358.278, "dur": 2.625, + "args": { + "External id": 232334,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961358.756, "dur": 1.609, + "args": { + "External id": 232335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327096961359.502, "dur": 0.794, + "args": { + "External id": 232336,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096961364.991, "dur": 37580.395, + "args": { + "External id": 232337,"Record function id": 0, "Sequence number": 959157, "Fwd thread id": 1, "Ev Idx": 912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096961366.529, "dur": 37570.083, + "args": { + "External id": 232338,"Sequence number": 959157, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 913 + } + }, + { + "ph": "f", "id": 19, "pid": 2070552, "tid": 2107648, "ts": 5327096961366.529, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.26)", "pid": 2070552, "tid": 2107648, + "ts": 5327096961395.075, "dur": 36.431, + "args": { + "External id": 232339,"Record function id": 0, "Ev Idx": 914 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.26)", "pid": 2070552, "tid": 2107648, + "ts": 5327096961438.911, "dur": 65.032, + "args": { + "External id": 232340,"Record function id": 0, "Ev Idx": 915 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.26)", "pid": 2070552, "tid": 2107648, + "ts": 5327096961509.766, "dur": 37419.394, + "args": { + "External id": 232341,"Record function id": 0, "Ev Idx": 916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096961593.108, "dur": 5.844, + "args": { + "External id": 232342,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096961608.320, "dur": 4.138, + "args": { + "External id": 232343,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327096961664.585, "dur": 36459.763, + "args": { + "External id": 232344,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327096961678.298, "dur": 36437.456, + "args": { + "External id": 232345,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096961710.503, "dur": 14.859, + "args": { + "External id": 232346,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096961731.782, "dur": 36343.114, + "args": { + "External id": 232347,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096961735.455, "dur": 36338.865, + "args": { + "External id": 232348,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096961741.025, "dur": 5.336, + "args": { + "External id": 232349,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096961748.048, "dur": 36323.107, + "args": { + "External id": 232350,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096998210.955, "dur": 8.708, + "args": { + "External id": 232351,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096998213.930, "dur": 5.455, + "args": { + "External id": 232352,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327096998246.527, "dur": 354.100, + "args": { + "External id": 232353,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096998272.786, "dur": 323.716, + "args": { + "External id": 232354,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 929, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327096998284.258, "dur": 307.367, + "args": { + "External id": 232355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096998650.265, "dur": 3.306, + "args": { + "External id": 232356,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 931, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096998716.503, "dur": 6.623, + "args": { + "External id": 232357,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096998769.275, "dur": 1.764, + "args": { + "External id": 232358,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096998786.778, "dur": 1.848, + "args": { + "External id": 232359,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096998801.080, "dur": 1.317, + "args": { + "External id": 232360,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096998814.662, "dur": 0.780, + "args": { + "External id": 232361,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096998828.537, "dur": 1.057, + "args": { + "External id": 232362,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096998840.929, "dur": 1.199, + "args": { + "External id": 232363,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096998853.338, "dur": 2.774, + "args": { + "External id": 232364,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096998866.092, "dur": 0.897, + "args": { + "External id": 232365,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327096998960.674, "dur": 2772.427, + "args": { + "External id": 232366,"Record function id": 0, "Ev Idx": 941 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 2070552, "tid": 2107648, + "ts": 5327096998995.219, "dur": 1047.830, + "args": { + "External id": 232367,"Record function id": 0, "Ev Idx": 942 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2070552, "tid": 2107648, + "ts": 5327096999011.770, "dur": 319.630, + "args": { + "External id": 232368,"Record function id": 0, "Ev Idx": 943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096999093.805, "dur": 4.511, + "args": { + "External id": 232369,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096999102.081, "dur": 1.134, + "args": { + "External id": 232370,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096999105.167, "dur": 1.035, + "args": { + "External id": 232371,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096999109.231, "dur": 1.145, + "args": { + "External id": 232372,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096999111.923, "dur": 1.174, + "args": { + "External id": 232373,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096999114.644, "dur": 1.117, + "args": { + "External id": 232374,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096999117.356, "dur": 3.085, + "args": { + "External id": 232375,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096999123.525, "dur": 2.896, + "args": { + "External id": 232376,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096999128.190, "dur": 0.857, + "args": { + "External id": 232377,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327096999130.609, "dur": 0.820, + "args": { + "External id": 232378,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096999148.845, "dur": 151.812, + "args": { + "External id": 232379,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096999163.727, "dur": 132.493, + "args": { + "External id": 232380,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096999180.751, "dur": 14.630, + "args": { + "External id": 232381,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327096999199.374, "dur": 68.343, + "args": { + "External id": 232382,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327096999201.582, "dur": 65.788, + "args": { + "External id": 232383,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999207.519, "dur": 7.332, + "args": { + "External id": 232384,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096999216.471, "dur": 50.350, + "args": { + "External id": 232385,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 960 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 2070552, "tid": 2107648, + "ts": 5327096999419.135, "dur": 616.276, + "args": { + "External id": 232386,"Record function id": 0, "Ev Idx": 961 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2070552, "tid": 2107648, + "ts": 5327096999435.558, "dur": 586.133, + "args": { + "External id": 232387,"Record function id": 0, "Ev Idx": 962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096999494.207, "dur": 4.910, + "args": { + "External id": 232388,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327096999514.346, "dur": 29.614, + "args": { + "External id": 232389,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999518.820, "dur": 1.523, + "args": { + "External id": 232390,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999521.780, "dur": 1.968, + "args": { + "External id": 232391,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999524.815, "dur": 1.909, + "args": { + "External id": 232392,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999527.759, "dur": 0.420, + "args": { + "External id": 232393,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999530.852, "dur": 0.314, + "args": { + "External id": 232394,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999532.208, "dur": 0.604, + "args": { + "External id": 232395,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999533.678, "dur": 0.433, + "args": { + "External id": 232396,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999536.219, "dur": 0.562, + "args": { + "External id": 232397,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999537.404, "dur": 0.599, + "args": { + "External id": 232398,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096999553.779, "dur": 31.032, + "args": { + "External id": 232399,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327096999613.601, "dur": 162.374, + "args": { + "External id": 232400,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327096999672.606, "dur": 4.892, + "args": { + "External id": 232401,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327096999683.121, "dur": 12.537, + "args": { + "External id": 232402,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327096999687.582, "dur": 7.680, + "args": { + "External id": 232403,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999690.813, "dur": 2.687, + "args": { + "External id": 232404,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327096999703.205, "dur": 24.849, + "args": { + "External id": 232405,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999705.296, "dur": 0.537, + "args": { + "External id": 232406,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999708.538, "dur": 0.577, + "args": { + "External id": 232407,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999709.929, "dur": 0.595, + "args": { + "External id": 232408,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999711.339, "dur": 1.895, + "args": { + "External id": 232409,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999714.266, "dur": 0.335, + "args": { + "External id": 232410,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999715.359, "dur": 0.602, + "args": { + "External id": 232411,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999717.878, "dur": 0.254, + "args": { + "External id": 232412,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999719.042, "dur": 2.343, + "args": { + "External id": 232413,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327096999722.603, "dur": 0.353, + "args": { + "External id": 232414,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327096999743.046, "dur": 24.647, + "args": { + "External id": 232415,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327096999825.927, "dur": 110.538, + "args": { + "External id": 232416,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096999848.659, "dur": 84.505, + "args": { + "External id": 232417,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 992, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327096999857.942, "dur": 71.043, + "args": { + "External id": 232418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327096999951.314, "dur": 1.966, + "args": { + "External id": 232419,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 994, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097000051.285, "dur": 1655.338, + "args": { + "External id": 232420,"Sequence number": 959156, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 995 + } + }, + { + "ph": "f", "id": 20, "pid": 2070552, "tid": 2107648, "ts": 5327097000051.285, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097000160.448, "dur": 105.481, + "args": { + "External id": 232421,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097000303.692, "dur": 41.029, + "args": { + "External id": 232422,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097000363.791, "dur": 49.933, + "args": { + "External id": 232423,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097000423.627, "dur": 32.544, + "args": { + "External id": 232424,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097000462.070, "dur": 46.584, + "args": { + "External id": 232425,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097000515.223, "dur": 27.880, + "args": { + "External id": 232426,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097000552.687, "dur": 42.733, + "args": { + "External id": 232427,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097000617.159, "dur": 62.083, + "args": { + "External id": 232428,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097000700.076, "dur": 30.665, + "args": { + "External id": 232429,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097000752.683, "dur": 21.546, + "args": { + "External id": 232430,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097000794.161, "dur": 15.604, + "args": { + "External id": 232431,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097000820.257, "dur": 34.503, + "args": { + "External id": 232432,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097000857.784, "dur": 33.025, + "args": { + "External id": 232433,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097000916.918, "dur": 196.656, + "args": { + "External id": 232434,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097001016.906, "dur": 6.918, + "args": { + "External id": 232435,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097001025.894, "dur": 2.408, + "args": { + "External id": 232436,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097001147.915, "dur": 25.632, + "args": { + "External id": 232437,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097001184.481, "dur": 14.632, + "args": { + "External id": 232438,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097001206.837, "dur": 39.189, + "args": { + "External id": 232439,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097001253.310, "dur": 34.363, + "args": { + "External id": 232440,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097001293.944, "dur": 22.200, + "args": { + "External id": 232441,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097001320.562, "dur": 29.535, + "args": { + "External id": 232442,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097001355.317, "dur": 24.532, + "args": { + "External id": 232443,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097001387.789, "dur": 46.907, + "args": { + "External id": 232444,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097001458.915, "dur": 24.985, + "args": { + "External id": 232445,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097001501.145, "dur": 26.072, + "args": { + "External id": 232446,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097001540.487, "dur": 18.095, + "args": { + "External id": 232447,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097001572.592, "dur": 17.719, + "args": { + "External id": 232448,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097001616.012, "dur": 55.061, + "args": { + "External id": 232449,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001754.713, "dur": 17.742, + "args": { + "External id": 232450,"Record function id": 0, "Ev Idx": 1025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001761.344, "dur": 10.078, + "args": { + "External id": 232451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001765.386, "dur": 5.238, + "args": { + "External id": 232452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001766.681, "dur": 3.820, + "args": { + "External id": 232453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001776.146, "dur": 7.236, + "args": { + "External id": 232454,"Record function id": 0, "Ev Idx": 1029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001777.447, "dur": 5.487, + "args": { + "External id": 232455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001778.397, "dur": 3.789, + "args": { + "External id": 232456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001778.705, "dur": 3.394, + "args": { + "External id": 232457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001786.553, "dur": 3.852, + "args": { + "External id": 232458,"Record function id": 0, "Ev Idx": 1033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001787.825, "dur": 2.173, + "args": { + "External id": 232459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001788.307, "dur": 1.206, + "args": { + "External id": 232460,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001788.671, "dur": 0.749, + "args": { + "External id": 232461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001793.504, "dur": 4.029, + "args": { + "External id": 232462,"Record function id": 0, "Ev Idx": 1037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001794.714, "dur": 2.421, + "args": { + "External id": 232463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001795.397, "dur": 1.167, + "args": { + "External id": 232464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001795.731, "dur": 0.766, + "args": { + "External id": 232465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001800.537, "dur": 3.643, + "args": { + "External id": 232466,"Record function id": 0, "Ev Idx": 1041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001801.606, "dur": 2.184, + "args": { + "External id": 232467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001802.074, "dur": 1.151, + "args": { + "External id": 232468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001802.489, "dur": 0.631, + "args": { + "External id": 232469,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001807.133, "dur": 3.623, + "args": { + "External id": 232470,"Record function id": 0, "Ev Idx": 1045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001808.103, "dur": 2.258, + "args": { + "External id": 232471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001808.541, "dur": 1.194, + "args": { + "External id": 232472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001809.011, "dur": 0.650, + "args": { + "External id": 232473,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001813.856, "dur": 3.689, + "args": { + "External id": 232474,"Record function id": 0, "Ev Idx": 1049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001815.088, "dur": 2.034, + "args": { + "External id": 232475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001815.703, "dur": 1.022, + "args": { + "External id": 232476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001816.015, "dur": 0.609, + "args": { + "External id": 232477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001820.514, "dur": 3.795, + "args": { + "External id": 232478,"Record function id": 0, "Ev Idx": 1053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001821.495, "dur": 2.434, + "args": { + "External id": 232479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001822.309, "dur": 1.217, + "args": { + "External id": 232480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001822.842, "dur": 0.610, + "args": { + "External id": 232481,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001828.366, "dur": 5.773, + "args": { + "External id": 232482,"Record function id": 0, "Ev Idx": 1057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097001829.362, "dur": 4.378, + "args": { + "External id": 232483,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001829.985, "dur": 3.181, + "args": { + "External id": 232484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097001830.411, "dur": 2.691, + "args": { + "External id": 232485,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097001837.936, "dur": 38267.781, + "args": { + "External id": 232486,"Record function id": 0, "Sequence number": 959155, "Fwd thread id": 1, "Ev Idx": 1061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097001839.144, "dur": 38257.962, + "args": { + "External id": 232487,"Sequence number": 959155, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1062 + } + }, + { + "ph": "f", "id": 21, "pid": 2070552, "tid": 2107648, "ts": 5327097001839.144, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 2070552, "tid": 2107648, + "ts": 5327097001868.720, "dur": 39.443, + "args": { + "External id": 232488,"Record function id": 0, "Ev Idx": 1063 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 2070552, "tid": 2107648, + "ts": 5327097001915.745, "dur": 81.908, + "args": { + "External id": 232489,"Record function id": 0, "Ev Idx": 1064 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 2070552, "tid": 2107648, + "ts": 5327097002005.200, "dur": 38083.738, + "args": { + "External id": 232490,"Record function id": 0, "Ev Idx": 1065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097002093.724, "dur": 7.095, + "args": { + "External id": 232491,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097002110.505, "dur": 5.107, + "args": { + "External id": 232492,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097002129.428, "dur": 37096.748, + "args": { + "External id": 232493,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097002142.943, "dur": 37073.861, + "args": { + "External id": 232494,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097002179.563, "dur": 13.932, + "args": { + "External id": 232495,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097002199.716, "dur": 36975.177, + "args": { + "External id": 232496,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097002204.515, "dur": 36969.697, + "args": { + "External id": 232497,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097002208.310, "dur": 4.754, + "args": { + "External id": 232498,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097002215.046, "dur": 36955.555, + "args": { + "External id": 232499,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097039313.052, "dur": 8.811, + "args": { + "External id": 232500,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097039315.998, "dur": 5.499, + "args": { + "External id": 232501,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097039348.419, "dur": 427.750, + "args": { + "External id": 232502,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097039375.840, "dur": 394.825, + "args": { + "External id": 232503,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1078, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097039386.865, "dur": 377.962, + "args": { + "External id": 232504,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097039799.319, "dur": 2.237, + "args": { + "External id": 232505,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1080, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097039862.445, "dur": 6.675, + "args": { + "External id": 232506,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097039910.331, "dur": 3.399, + "args": { + "External id": 232507,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097039930.065, "dur": 1.262, + "args": { + "External id": 232508,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097039943.929, "dur": 0.889, + "args": { + "External id": 232509,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097039956.605, "dur": 0.753, + "args": { + "External id": 232510,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097039967.721, "dur": 3.387, + "args": { + "External id": 232511,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097039999.548, "dur": 1.489, + "args": { + "External id": 232512,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040013.536, "dur": 1.832, + "args": { + "External id": 232513,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040025.895, "dur": 0.823, + "args": { + "External id": 232514,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097040120.270, "dur": 2767.643, + "args": { + "External id": 232515,"Record function id": 0, "Ev Idx": 1090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 2070552, "tid": 2107648, + "ts": 5327097040139.856, "dur": 1047.996, + "args": { + "External id": 232516,"Record function id": 0, "Ev Idx": 1091 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2070552, "tid": 2107648, + "ts": 5327097040154.598, "dur": 322.217, + "args": { + "External id": 232517,"Record function id": 0, "Ev Idx": 1092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097040243.638, "dur": 6.437, + "args": { + "External id": 232518,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097040253.616, "dur": 0.873, + "args": { + "External id": 232519,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097040256.710, "dur": 0.972, + "args": { + "External id": 232520,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097040259.562, "dur": 0.891, + "args": { + "External id": 232521,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097040262.109, "dur": 1.089, + "args": { + "External id": 232522,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097040264.932, "dur": 0.916, + "args": { + "External id": 232523,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097040267.581, "dur": 1.907, + "args": { + "External id": 232524,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097040271.110, "dur": 1.023, + "args": { + "External id": 232525,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097040274.237, "dur": 2.893, + "args": { + "External id": 232526,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097040279.215, "dur": 0.607, + "args": { + "External id": 232527,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097040298.372, "dur": 151.199, + "args": { + "External id": 232528,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097040314.527, "dur": 130.680, + "args": { + "External id": 232529,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097040329.731, "dur": 13.679, + "args": { + "External id": 232530,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097040347.971, "dur": 68.447, + "args": { + "External id": 232531,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097040352.274, "dur": 63.812, + "args": { + "External id": 232532,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040355.692, "dur": 6.970, + "args": { + "External id": 232533,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097040364.438, "dur": 51.148, + "args": { + "External id": 232534,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1109 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 2070552, "tid": 2107648, + "ts": 5327097040560.044, "dur": 620.476, + "args": { + "External id": 232535,"Record function id": 0, "Ev Idx": 1110 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2070552, "tid": 2107648, + "ts": 5327097040575.620, "dur": 592.563, + "args": { + "External id": 232536,"Record function id": 0, "Ev Idx": 1111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097040674.464, "dur": 6.863, + "args": { + "External id": 232537,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097040698.465, "dur": 36.191, + "args": { + "External id": 232538,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040703.334, "dur": 1.702, + "args": { + "External id": 232539,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040712.101, "dur": 0.624, + "args": { + "External id": 232540,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040714.379, "dur": 0.559, + "args": { + "External id": 232541,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040716.656, "dur": 2.368, + "args": { + "External id": 232542,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040721.287, "dur": 0.388, + "args": { + "External id": 232543,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040723.022, "dur": 0.518, + "args": { + "External id": 232544,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040724.789, "dur": 1.309, + "args": { + "External id": 232545,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040728.062, "dur": 0.378, + "args": { + "External id": 232546,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040730.143, "dur": 0.522, + "args": { + "External id": 232547,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097040745.024, "dur": 39.380, + "args": { + "External id": 232548,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097040817.039, "dur": 106.603, + "args": { + "External id": 232549,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097040826.926, "dur": 3.798, + "args": { + "External id": 232550,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097040835.857, "dur": 10.013, + "args": { + "External id": 232551,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097040840.199, "dur": 5.195, + "args": { + "External id": 232552,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040843.625, "dur": 0.682, + "args": { + "External id": 232553,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097040852.326, "dur": 30.835, + "args": { + "External id": 232554,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040854.560, "dur": 3.087, + "args": { + "External id": 232555,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040858.941, "dur": 0.376, + "args": { + "External id": 232556,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040861.015, "dur": 0.548, + "args": { + "External id": 232557,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040864.629, "dur": 0.543, + "args": { + "External id": 232558,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040867.050, "dur": 0.402, + "args": { + "External id": 232559,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040869.045, "dur": 0.387, + "args": { + "External id": 232560,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040871.596, "dur": 0.591, + "args": { + "External id": 232561,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040873.393, "dur": 0.392, + "args": { + "External id": 232562,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097040875.734, "dur": 3.272, + "args": { + "External id": 232563,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097040895.430, "dur": 20.524, + "args": { + "External id": 232564,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097040967.533, "dur": 130.659, + "args": { + "External id": 232565,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097041007.788, "dur": 86.724, + "args": { + "External id": 232566,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1141, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097041017.417, "dur": 72.915, + "args": { + "External id": 232567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097041115.072, "dur": 1.906, + "args": { + "External id": 232568,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1143, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097041195.182, "dur": 1667.829, + "args": { + "External id": 232569,"Sequence number": 959154, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1144 + } + }, + { + "ph": "f", "id": 22, "pid": 2070552, "tid": 2107648, "ts": 5327097041195.182, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097041305.551, "dur": 102.863, + "args": { + "External id": 232570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097041447.470, "dur": 40.816, + "args": { + "External id": 232571,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097041503.881, "dur": 48.848, + "args": { + "External id": 232572,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097041563.376, "dur": 32.539, + "args": { + "External id": 232573,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097041601.808, "dur": 87.317, + "args": { + "External id": 232574,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097041702.286, "dur": 32.213, + "args": { + "External id": 232575,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097041742.325, "dur": 43.210, + "args": { + "External id": 232576,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097041812.802, "dur": 25.256, + "args": { + "External id": 232577,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097041862.019, "dur": 27.327, + "args": { + "External id": 232578,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097041912.453, "dur": 20.150, + "args": { + "External id": 232579,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097041945.758, "dur": 14.134, + "args": { + "External id": 232580,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097041967.285, "dur": 50.472, + "args": { + "External id": 232581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097042022.651, "dur": 34.526, + "args": { + "External id": 232582,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097042088.933, "dur": 173.721, + "args": { + "External id": 232583,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097042167.763, "dur": 6.481, + "args": { + "External id": 232584,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097042176.288, "dur": 6.520, + "args": { + "External id": 232585,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097042296.699, "dur": 26.313, + "args": { + "External id": 232586,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097042341.450, "dur": 14.620, + "args": { + "External id": 232587,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097042363.867, "dur": 37.697, + "args": { + "External id": 232588,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097042407.202, "dur": 33.253, + "args": { + "External id": 232589,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097042447.315, "dur": 22.827, + "args": { + "External id": 232590,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097042476.051, "dur": 32.357, + "args": { + "External id": 232591,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097042514.407, "dur": 21.868, + "args": { + "External id": 232592,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097042542.532, "dur": 46.417, + "args": { + "External id": 232593,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097042614.790, "dur": 73.884, + "args": { + "External id": 232594,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097042708.457, "dur": 25.796, + "args": { + "External id": 232595,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097042748.680, "dur": 18.506, + "args": { + "External id": 232596,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097042782.931, "dur": 18.928, + "args": { + "External id": 232597,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097042814.117, "dur": 15.723, + "args": { + "External id": 232598,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042908.574, "dur": 14.632, + "args": { + "External id": 232599,"Record function id": 0, "Ev Idx": 1174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042911.648, "dur": 10.595, + "args": { + "External id": 232600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042916.054, "dur": 5.417, + "args": { + "External id": 232601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042917.527, "dur": 3.863, + "args": { + "External id": 232602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042926.879, "dur": 5.363, + "args": { + "External id": 232603,"Record function id": 0, "Ev Idx": 1178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042928.663, "dur": 3.144, + "args": { + "External id": 232604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042929.533, "dur": 1.804, + "args": { + "External id": 232605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042930.009, "dur": 1.245, + "args": { + "External id": 232606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042935.465, "dur": 4.310, + "args": { + "External id": 232607,"Record function id": 0, "Ev Idx": 1182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042936.891, "dur": 2.474, + "args": { + "External id": 232608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042937.590, "dur": 1.362, + "args": { + "External id": 232609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042938.015, "dur": 0.843, + "args": { + "External id": 232610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042942.823, "dur": 4.515, + "args": { + "External id": 232611,"Record function id": 0, "Ev Idx": 1186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042944.378, "dur": 2.534, + "args": { + "External id": 232612,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042945.121, "dur": 1.374, + "args": { + "External id": 232613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042945.782, "dur": 0.642, + "args": { + "External id": 232614,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042950.308, "dur": 4.116, + "args": { + "External id": 232615,"Record function id": 0, "Ev Idx": 1190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042951.563, "dur": 2.452, + "args": { + "External id": 232616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042952.041, "dur": 1.410, + "args": { + "External id": 232617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042952.657, "dur": 0.721, + "args": { + "External id": 232618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042957.409, "dur": 4.359, + "args": { + "External id": 232619,"Record function id": 0, "Ev Idx": 1194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042958.789, "dur": 2.541, + "args": { + "External id": 232620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042959.413, "dur": 1.372, + "args": { + "External id": 232621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042960.094, "dur": 0.615, + "args": { + "External id": 232622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042964.846, "dur": 6.854, + "args": { + "External id": 232623,"Record function id": 0, "Ev Idx": 1198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042965.876, "dur": 5.397, + "args": { + "External id": 232624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042967.258, "dur": 3.477, + "args": { + "External id": 232625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042967.843, "dur": 2.818, + "args": { + "External id": 232626,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042974.805, "dur": 22.062, + "args": { + "External id": 232627,"Record function id": 0, "Ev Idx": 1202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097042990.454, "dur": 5.431, + "args": { + "External id": 232628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042992.225, "dur": 2.600, + "args": { + "External id": 232629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097042993.073, "dur": 1.493, + "args": { + "External id": 232630,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097043002.566, "dur": 4.616, + "args": { + "External id": 232631,"Record function id": 0, "Ev Idx": 1206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097043004.281, "dur": 2.479, + "args": { + "External id": 232632,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097043004.985, "dur": 1.126, + "args": { + "External id": 232633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097043005.617, "dur": 0.424, + "args": { + "External id": 232634,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097043010.872, "dur": 37295.476, + "args": { + "External id": 232635,"Record function id": 0, "Sequence number": 959153, "Fwd thread id": 1, "Ev Idx": 1210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097043012.158, "dur": 37285.561, + "args": { + "External id": 232636,"Sequence number": 959153, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1211 + } + }, + { + "ph": "f", "id": 23, "pid": 2070552, "tid": 2107648, "ts": 5327097043012.158, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 2070552, "tid": 2107648, + "ts": 5327097043039.914, "dur": 37.088, + "args": { + "External id": 232637,"Record function id": 0, "Ev Idx": 1212 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 2070552, "tid": 2107648, + "ts": 5327097043084.467, "dur": 66.281, + "args": { + "External id": 232638,"Record function id": 0, "Ev Idx": 1213 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 2070552, "tid": 2107648, + "ts": 5327097043156.656, "dur": 37133.497, + "args": { + "External id": 232639,"Record function id": 0, "Ev Idx": 1214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097043243.379, "dur": 6.622, + "args": { + "External id": 232640,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097043259.139, "dur": 4.861, + "args": { + "External id": 232641,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097043277.671, "dur": 36185.076, + "args": { + "External id": 232642,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097043291.193, "dur": 36162.335, + "args": { + "External id": 232643,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097043329.368, "dur": 13.566, + "args": { + "External id": 232644,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097043349.072, "dur": 36066.895, + "args": { + "External id": 232645,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097043351.839, "dur": 36063.411, + "args": { + "External id": 232646,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097043355.750, "dur": 5.051, + "args": { + "External id": 232647,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097043362.410, "dur": 36049.650, + "args": { + "External id": 232648,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097079549.565, "dur": 8.622, + "args": { + "External id": 232649,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097079552.064, "dur": 5.822, + "args": { + "External id": 232650,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097079584.581, "dur": 413.289, + "args": { + "External id": 232651,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097079612.380, "dur": 380.201, + "args": { + "External id": 232652,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1227, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097079646.990, "dur": 327.778, + "args": { + "External id": 232653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097080020.991, "dur": 2.787, + "args": { + "External id": 232654,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1229, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080085.214, "dur": 8.355, + "args": { + "External id": 232655,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080136.327, "dur": 1.478, + "args": { + "External id": 232656,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080153.062, "dur": 1.510, + "args": { + "External id": 232657,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080166.646, "dur": 0.986, + "args": { + "External id": 232658,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080180.105, "dur": 3.237, + "args": { + "External id": 232659,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080193.661, "dur": 1.051, + "args": { + "External id": 232660,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080205.124, "dur": 0.993, + "args": { + "External id": 232661,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080217.890, "dur": 1.787, + "args": { + "External id": 232662,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080230.351, "dur": 2.771, + "args": { + "External id": 232663,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097080319.732, "dur": 2757.113, + "args": { + "External id": 232664,"Record function id": 0, "Ev Idx": 1239 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 2070552, "tid": 2107648, + "ts": 5327097080338.548, "dur": 1041.134, + "args": { + "External id": 232665,"Record function id": 0, "Ev Idx": 1240 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2070552, "tid": 2107648, + "ts": 5327097080353.472, "dur": 350.420, + "args": { + "External id": 232666,"Record function id": 0, "Ev Idx": 1241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097080436.194, "dur": 4.173, + "args": { + "External id": 232667,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097080443.597, "dur": 1.337, + "args": { + "External id": 232668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097080446.887, "dur": 0.770, + "args": { + "External id": 232669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097080449.596, "dur": 1.006, + "args": { + "External id": 232670,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097080452.391, "dur": 0.719, + "args": { + "External id": 232671,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097080455.109, "dur": 0.883, + "args": { + "External id": 232672,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097080457.509, "dur": 4.385, + "args": { + "External id": 232673,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097080463.480, "dur": 0.964, + "args": { + "External id": 232674,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097080466.228, "dur": 0.999, + "args": { + "External id": 232675,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097080468.909, "dur": 1.158, + "args": { + "External id": 232676,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097080488.541, "dur": 183.855, + "args": { + "External id": 232677,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097080504.026, "dur": 163.049, + "args": { + "External id": 232678,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097080517.612, "dur": 11.853, + "args": { + "External id": 232679,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097080533.978, "dur": 66.195, + "args": { + "External id": 232680,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097080536.336, "dur": 63.532, + "args": { + "External id": 232681,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080541.673, "dur": 5.769, + "args": { + "External id": 232682,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097080549.178, "dur": 50.136, + "args": { + "External id": 232683,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1258 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 2070552, "tid": 2107648, + "ts": 5327097080796.255, "dur": 575.627, + "args": { + "External id": 232684,"Record function id": 0, "Ev Idx": 1259 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2070552, "tid": 2107648, + "ts": 5327097080812.656, "dur": 547.160, + "args": { + "External id": 232685,"Record function id": 0, "Ev Idx": 1260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097080876.777, "dur": 5.557, + "args": { + "External id": 232686,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097080897.837, "dur": 35.184, + "args": { + "External id": 232687,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080902.542, "dur": 1.581, + "args": { + "External id": 232688,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080906.528, "dur": 4.402, + "args": { + "External id": 232689,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080912.241, "dur": 0.748, + "args": { + "External id": 232690,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080915.116, "dur": 0.529, + "args": { + "External id": 232691,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080918.732, "dur": 0.607, + "args": { + "External id": 232692,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080921.032, "dur": 0.635, + "args": { + "External id": 232693,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080922.895, "dur": 0.557, + "args": { + "External id": 232694,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080925.389, "dur": 0.527, + "args": { + "External id": 232695,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097080927.250, "dur": 0.536, + "args": { + "External id": 232696,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097080943.452, "dur": 47.192, + "args": { + "External id": 232697,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097081024.659, "dur": 112.554, + "args": { + "External id": 232698,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097081035.569, "dur": 6.530, + "args": { + "External id": 232699,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097081046.892, "dur": 10.710, + "args": { + "External id": 232700,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097081051.444, "dur": 5.735, + "args": { + "External id": 232701,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097081054.928, "dur": 0.719, + "args": { + "External id": 232702,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097081065.151, "dur": 29.022, + "args": { + "External id": 232703,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097081068.111, "dur": 0.674, + "args": { + "External id": 232704,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097081071.086, "dur": 0.547, + "args": { + "External id": 232705,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097081073.093, "dur": 0.648, + "args": { + "External id": 232706,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097081075.033, "dur": 1.628, + "args": { + "External id": 232707,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097081077.959, "dur": 0.766, + "args": { + "External id": 232708,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097081080.392, "dur": 0.587, + "args": { + "External id": 232709,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097081083.667, "dur": 2.115, + "args": { + "External id": 232710,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097081087.118, "dur": 0.353, + "args": { + "External id": 232711,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097081088.954, "dur": 0.455, + "args": { + "External id": 232712,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097081104.844, "dur": 24.738, + "args": { + "External id": 232713,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097081181.550, "dur": 112.946, + "args": { + "External id": 232714,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097081208.128, "dur": 83.030, + "args": { + "External id": 232715,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1290, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097081217.204, "dur": 68.893, + "args": { + "External id": 232716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097081310.023, "dur": 2.047, + "args": { + "External id": 232717,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1292, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097081387.540, "dur": 1664.283, + "args": { + "External id": 232718,"Sequence number": 959152, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1293 + } + }, + { + "ph": "f", "id": 24, "pid": 2070552, "tid": 2107648, "ts": 5327097081387.540, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097081495.066, "dur": 104.742, + "args": { + "External id": 232719,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097081680.284, "dur": 45.409, + "args": { + "External id": 232720,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097081746.280, "dur": 56.775, + "args": { + "External id": 232721,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097081812.974, "dur": 32.102, + "args": { + "External id": 232722,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097081854.284, "dur": 45.265, + "args": { + "External id": 232723,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097081911.998, "dur": 27.489, + "args": { + "External id": 232724,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097081948.588, "dur": 56.330, + "args": { + "External id": 232725,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097082030.467, "dur": 24.083, + "args": { + "External id": 232726,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097082071.730, "dur": 27.073, + "args": { + "External id": 232727,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097082118.138, "dur": 19.623, + "args": { + "External id": 232728,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097082151.067, "dur": 14.736, + "args": { + "External id": 232729,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097082174.792, "dur": 32.196, + "args": { + "External id": 232730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097082209.878, "dur": 32.912, + "args": { + "External id": 232731,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097082267.570, "dur": 177.954, + "args": { + "External id": 232732,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097082346.688, "dur": 6.512, + "args": { + "External id": 232733,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097082355.349, "dur": 3.931, + "args": { + "External id": 232734,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097082476.418, "dur": 27.636, + "args": { + "External id": 232735,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097082515.337, "dur": 14.345, + "args": { + "External id": 232736,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097082536.543, "dur": 37.083, + "args": { + "External id": 232737,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097082578.235, "dur": 33.372, + "args": { + "External id": 232738,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097082617.839, "dur": 62.806, + "args": { + "External id": 232739,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097082690.544, "dur": 31.668, + "args": { + "External id": 232740,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097082728.374, "dur": 31.258, + "args": { + "External id": 232741,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097082772.371, "dur": 40.409, + "args": { + "External id": 232742,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097082837.034, "dur": 24.241, + "args": { + "External id": 232743,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097082880.712, "dur": 25.222, + "args": { + "External id": 232744,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097082920.921, "dur": 20.325, + "args": { + "External id": 232745,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097082954.936, "dur": 16.483, + "args": { + "External id": 232746,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097082999.992, "dur": 19.004, + "args": { + "External id": 232747,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083098.255, "dur": 14.749, + "args": { + "External id": 232748,"Record function id": 0, "Ev Idx": 1323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083101.702, "dur": 10.344, + "args": { + "External id": 232749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083106.164, "dur": 5.167, + "args": { + "External id": 232750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083107.356, "dur": 3.863, + "args": { + "External id": 232751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083116.616, "dur": 4.853, + "args": { + "External id": 232752,"Record function id": 0, "Ev Idx": 1327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083117.882, "dur": 3.097, + "args": { + "External id": 232753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083118.697, "dur": 1.514, + "args": { + "External id": 232754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083119.204, "dur": 0.915, + "args": { + "External id": 232755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083124.710, "dur": 4.088, + "args": { + "External id": 232756,"Record function id": 0, "Ev Idx": 1331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083125.994, "dur": 2.397, + "args": { + "External id": 232757,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083126.642, "dur": 1.104, + "args": { + "External id": 232758,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083127.034, "dur": 0.622, + "args": { + "External id": 232759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083131.889, "dur": 4.253, + "args": { + "External id": 232760,"Record function id": 0, "Ev Idx": 1335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083133.054, "dur": 2.643, + "args": { + "External id": 232761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083133.561, "dur": 1.722, + "args": { + "External id": 232762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083134.597, "dur": 0.623, + "args": { + "External id": 232763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083139.267, "dur": 4.122, + "args": { + "External id": 232764,"Record function id": 0, "Ev Idx": 1339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083140.550, "dur": 2.429, + "args": { + "External id": 232765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083141.150, "dur": 1.266, + "args": { + "External id": 232766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083141.700, "dur": 0.640, + "args": { + "External id": 232767,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083146.476, "dur": 4.846, + "args": { + "External id": 232768,"Record function id": 0, "Ev Idx": 1343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083147.729, "dur": 3.192, + "args": { + "External id": 232769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083148.382, "dur": 1.870, + "args": { + "External id": 232770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083148.967, "dur": 1.210, + "args": { + "External id": 232771,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083154.687, "dur": 7.507, + "args": { + "External id": 232772,"Record function id": 0, "Ev Idx": 1347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083156.462, "dur": 5.295, + "args": { + "External id": 232773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083157.206, "dur": 3.871, + "args": { + "External id": 232774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083158.186, "dur": 2.817, + "args": { + "External id": 232775,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083165.450, "dur": 4.287, + "args": { + "External id": 232776,"Record function id": 0, "Ev Idx": 1351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083166.859, "dur": 2.462, + "args": { + "External id": 232777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083167.326, "dur": 1.558, + "args": { + "External id": 232778,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083167.806, "dur": 1.014, + "args": { + "External id": 232779,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083172.816, "dur": 4.763, + "args": { + "External id": 232780,"Record function id": 0, "Ev Idx": 1355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097083174.473, "dur": 2.692, + "args": { + "External id": 232781,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083175.267, "dur": 1.470, + "args": { + "External id": 232782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097083175.976, "dur": 0.687, + "args": { + "External id": 232783,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097083181.186, "dur": 37059.721, + "args": { + "External id": 232784,"Record function id": 0, "Sequence number": 959151, "Fwd thread id": 1, "Ev Idx": 1359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097083182.827, "dur": 37050.053, + "args": { + "External id": 232785,"Sequence number": 959151, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1360 + } + }, + { + "ph": "f", "id": 25, "pid": 2070552, "tid": 2107648, "ts": 5327097083182.827, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 2070552, "tid": 2107648, + "ts": 5327097083212.765, "dur": 37.356, + "args": { + "External id": 232786,"Record function id": 0, "Ev Idx": 1361 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 2070552, "tid": 2107648, + "ts": 5327097083257.859, "dur": 72.034, + "args": { + "External id": 232787,"Record function id": 0, "Ev Idx": 1362 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 2070552, "tid": 2107648, + "ts": 5327097083336.031, "dur": 36889.598, + "args": { + "External id": 232788,"Record function id": 0, "Ev Idx": 1363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097083421.864, "dur": 6.473, + "args": { + "External id": 232789,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097083437.672, "dur": 4.679, + "args": { + "External id": 232790,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097083459.032, "dur": 35901.437, + "args": { + "External id": 232791,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097083473.030, "dur": 35878.423, + "args": { + "External id": 232792,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097083516.470, "dur": 14.316, + "args": { + "External id": 232793,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097083537.134, "dur": 35774.885, + "args": { + "External id": 232794,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097083539.327, "dur": 35772.011, + "args": { + "External id": 232795,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097083543.894, "dur": 5.517, + "args": { + "External id": 232796,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097083550.928, "dur": 35757.052, + "args": { + "External id": 232797,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097119447.820, "dur": 9.249, + "args": { + "External id": 232798,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097119450.577, "dur": 6.140, + "args": { + "External id": 232799,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097119482.290, "dur": 434.830, + "args": { + "External id": 232800,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097119508.685, "dur": 403.171, + "args": { + "External id": 232801,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1376, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097119519.475, "dur": 386.786, + "args": { + "External id": 232802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097119940.281, "dur": 2.357, + "args": { + "External id": 232803,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1378, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120021.304, "dur": 8.183, + "args": { + "External id": 232804,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120072.402, "dur": 1.502, + "args": { + "External id": 232805,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120088.299, "dur": 1.696, + "args": { + "External id": 232806,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120100.932, "dur": 0.982, + "args": { + "External id": 232807,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120112.657, "dur": 3.198, + "args": { + "External id": 232808,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120126.694, "dur": 0.869, + "args": { + "External id": 232809,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120137.377, "dur": 1.223, + "args": { + "External id": 232810,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120149.155, "dur": 2.711, + "args": { + "External id": 232811,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120161.591, "dur": 3.350, + "args": { + "External id": 232812,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097120255.385, "dur": 2785.251, + "args": { + "External id": 232813,"Record function id": 0, "Ev Idx": 1388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 2070552, "tid": 2107648, + "ts": 5327097120274.486, "dur": 1042.457, + "args": { + "External id": 232814,"Record function id": 0, "Ev Idx": 1389 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2070552, "tid": 2107648, + "ts": 5327097120290.292, "dur": 300.292, + "args": { + "External id": 232815,"Record function id": 0, "Ev Idx": 1390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097120370.118, "dur": 3.922, + "args": { + "External id": 232816,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097120377.170, "dur": 1.102, + "args": { + "External id": 232817,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097120380.266, "dur": 0.831, + "args": { + "External id": 232818,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097120382.714, "dur": 1.114, + "args": { + "External id": 232819,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097120385.494, "dur": 0.836, + "args": { + "External id": 232820,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097120387.821, "dur": 1.049, + "args": { + "External id": 232821,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097120390.440, "dur": 3.818, + "args": { + "External id": 232822,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097120395.565, "dur": 0.751, + "args": { + "External id": 232823,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097120397.799, "dur": 1.322, + "args": { + "External id": 232824,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097120400.538, "dur": 0.966, + "args": { + "External id": 232825,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097120419.384, "dur": 143.814, + "args": { + "External id": 232826,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097120435.016, "dur": 124.059, + "args": { + "External id": 232827,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097120448.874, "dur": 12.794, + "args": { + "External id": 232828,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097120465.461, "dur": 65.845, + "args": { + "External id": 232829,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097120467.805, "dur": 63.249, + "args": { + "External id": 232830,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120472.467, "dur": 5.614, + "args": { + "External id": 232831,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097120479.799, "dur": 50.605, + "args": { + "External id": 232832,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1407 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 2070552, "tid": 2107648, + "ts": 5327097120732.111, "dur": 577.294, + "args": { + "External id": 232833,"Record function id": 0, "Ev Idx": 1408 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2070552, "tid": 2107648, + "ts": 5327097120751.347, "dur": 545.824, + "args": { + "External id": 232834,"Record function id": 0, "Ev Idx": 1409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097120816.330, "dur": 6.174, + "args": { + "External id": 232835,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097120841.866, "dur": 36.114, + "args": { + "External id": 232836,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120847.028, "dur": 1.603, + "args": { + "External id": 232837,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120851.331, "dur": 4.189, + "args": { + "External id": 232838,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120856.965, "dur": 0.368, + "args": { + "External id": 232839,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120858.668, "dur": 1.072, + "args": { + "External id": 232840,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120862.473, "dur": 0.300, + "args": { + "External id": 232841,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120864.058, "dur": 0.526, + "args": { + "External id": 232842,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120866.337, "dur": 0.545, + "args": { + "External id": 232843,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120869.790, "dur": 0.989, + "args": { + "External id": 232844,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120872.410, "dur": 0.789, + "args": { + "External id": 232845,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097120888.934, "dur": 36.460, + "args": { + "External id": 232846,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097120953.730, "dur": 128.054, + "args": { + "External id": 232847,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097120963.737, "dur": 5.745, + "args": { + "External id": 232848,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097120974.077, "dur": 26.525, + "args": { + "External id": 232849,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097120993.369, "dur": 6.737, + "args": { + "External id": 232850,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097120997.465, "dur": 0.946, + "args": { + "External id": 232851,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097121008.608, "dur": 30.247, + "args": { + "External id": 232852,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097121011.103, "dur": 0.565, + "args": { + "External id": 232853,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097121013.658, "dur": 1.505, + "args": { + "External id": 232854,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097121016.708, "dur": 0.788, + "args": { + "External id": 232855,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097121019.099, "dur": 0.569, + "args": { + "External id": 232856,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097121021.699, "dur": 0.906, + "args": { + "External id": 232857,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097121023.670, "dur": 0.504, + "args": { + "External id": 232858,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097121025.601, "dur": 2.355, + "args": { + "External id": 232859,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097121030.805, "dur": 0.674, + "args": { + "External id": 232860,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097121032.705, "dur": 0.580, + "args": { + "External id": 232861,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097121051.137, "dur": 22.757, + "args": { + "External id": 232862,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097121126.648, "dur": 109.471, + "args": { + "External id": 232863,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097121148.614, "dur": 83.934, + "args": { + "External id": 232864,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1439, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097121156.940, "dur": 71.539, + "args": { + "External id": 232865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097121247.384, "dur": 1.917, + "args": { + "External id": 232866,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1441, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097121323.921, "dur": 1691.696, + "args": { + "External id": 232867,"Sequence number": 959150, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1442 + } + }, + { + "ph": "f", "id": 26, "pid": 2070552, "tid": 2107648, "ts": 5327097121323.921, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097121431.495, "dur": 103.560, + "args": { + "External id": 232868,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097121571.725, "dur": 41.634, + "args": { + "External id": 232869,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097121673.422, "dur": 58.212, + "args": { + "External id": 232870,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097121742.839, "dur": 33.875, + "args": { + "External id": 232871,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097121783.683, "dur": 45.897, + "args": { + "External id": 232872,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097121837.135, "dur": 28.967, + "args": { + "External id": 232873,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097121875.810, "dur": 43.162, + "args": { + "External id": 232874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097121943.769, "dur": 24.476, + "args": { + "External id": 232875,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097121999.546, "dur": 31.035, + "args": { + "External id": 232876,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097122052.660, "dur": 21.115, + "args": { + "External id": 232877,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097122084.557, "dur": 15.882, + "args": { + "External id": 232878,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097122110.284, "dur": 32.844, + "args": { + "External id": 232879,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097122146.697, "dur": 32.630, + "args": { + "External id": 232880,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097122206.910, "dur": 168.064, + "args": { + "External id": 232881,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097122283.843, "dur": 5.144, + "args": { + "External id": 232882,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097122290.926, "dur": 3.219, + "args": { + "External id": 232883,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097122411.012, "dur": 29.573, + "args": { + "External id": 232884,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097122454.094, "dur": 15.469, + "args": { + "External id": 232885,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097122476.581, "dur": 37.163, + "args": { + "External id": 232886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097122519.099, "dur": 35.558, + "args": { + "External id": 232887,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097122561.629, "dur": 22.600, + "args": { + "External id": 232888,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097122590.404, "dur": 79.732, + "args": { + "External id": 232889,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097122681.076, "dur": 40.377, + "args": { + "External id": 232890,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097122734.721, "dur": 37.837, + "args": { + "External id": 232891,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097122796.747, "dur": 26.274, + "args": { + "External id": 232892,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097122843.992, "dur": 25.470, + "args": { + "External id": 232893,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097122885.650, "dur": 21.634, + "args": { + "External id": 232894,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097122921.923, "dur": 16.638, + "args": { + "External id": 232895,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097122952.704, "dur": 15.383, + "args": { + "External id": 232896,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123061.917, "dur": 17.642, + "args": { + "External id": 232897,"Record function id": 0, "Ev Idx": 1472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123064.893, "dur": 13.501, + "args": { + "External id": 232898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123069.116, "dur": 8.363, + "args": { + "External id": 232899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123070.428, "dur": 6.967, + "args": { + "External id": 232900,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123083.306, "dur": 4.400, + "args": { + "External id": 232901,"Record function id": 0, "Ev Idx": 1476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123084.518, "dur": 2.786, + "args": { + "External id": 232902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123085.395, "dur": 1.458, + "args": { + "External id": 232903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123085.832, "dur": 0.947, + "args": { + "External id": 232904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123090.819, "dur": 4.437, + "args": { + "External id": 232905,"Record function id": 0, "Ev Idx": 1480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123091.951, "dur": 2.888, + "args": { + "External id": 232906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123092.616, "dur": 1.548, + "args": { + "External id": 232907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123093.244, "dur": 0.831, + "args": { + "External id": 232908,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123098.400, "dur": 4.708, + "args": { + "External id": 232909,"Record function id": 0, "Ev Idx": 1484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123099.872, "dur": 2.815, + "args": { + "External id": 232910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123100.745, "dur": 1.517, + "args": { + "External id": 232911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123101.248, "dur": 0.945, + "args": { + "External id": 232912,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123106.158, "dur": 3.785, + "args": { + "External id": 232913,"Record function id": 0, "Ev Idx": 1488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123107.312, "dur": 2.226, + "args": { + "External id": 232914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123107.807, "dur": 1.333, + "args": { + "External id": 232915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123108.417, "dur": 0.650, + "args": { + "External id": 232916,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123112.963, "dur": 4.801, + "args": { + "External id": 232917,"Record function id": 0, "Ev Idx": 1492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123114.197, "dur": 3.140, + "args": { + "External id": 232918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123115.013, "dur": 1.757, + "args": { + "External id": 232919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123115.752, "dur": 0.947, + "args": { + "External id": 232920,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123120.993, "dur": 6.783, + "args": { + "External id": 232921,"Record function id": 0, "Ev Idx": 1496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123122.073, "dur": 5.281, + "args": { + "External id": 232922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123122.800, "dur": 3.922, + "args": { + "External id": 232923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123123.833, "dur": 2.819, + "args": { + "External id": 232924,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123131.123, "dur": 7.145, + "args": { + "External id": 232925,"Record function id": 0, "Ev Idx": 1500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123135.099, "dur": 2.734, + "args": { + "External id": 232926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123135.576, "dur": 1.866, + "args": { + "External id": 232927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123136.247, "dur": 1.123, + "args": { + "External id": 232928,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123141.713, "dur": 4.051, + "args": { + "External id": 232929,"Record function id": 0, "Ev Idx": 1504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097123142.658, "dur": 2.702, + "args": { + "External id": 232930,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123143.348, "dur": 1.623, + "args": { + "External id": 232931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097123144.246, "dur": 0.649, + "args": { + "External id": 232932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097123149.998, "dur": 36101.477, + "args": { + "External id": 232933,"Record function id": 0, "Sequence number": 959149, "Fwd thread id": 1, "Ev Idx": 1508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097123151.318, "dur": 36091.790, + "args": { + "External id": 232934,"Sequence number": 959149, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1509 + } + }, + { + "ph": "f", "id": 27, "pid": 2070552, "tid": 2107648, "ts": 5327097123151.318, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 2070552, "tid": 2107648, + "ts": 5327097123178.871, "dur": 42.133, + "args": { + "External id": 232935,"Record function id": 0, "Ev Idx": 1510 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 2070552, "tid": 2107648, + "ts": 5327097123228.631, "dur": 68.672, + "args": { + "External id": 232936,"Record function id": 0, "Ev Idx": 1511 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 2070552, "tid": 2107648, + "ts": 5327097123303.647, "dur": 35932.262, + "args": { + "External id": 232937,"Record function id": 0, "Ev Idx": 1512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097123391.217, "dur": 6.322, + "args": { + "External id": 232938,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097123407.255, "dur": 4.491, + "args": { + "External id": 232939,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097123426.376, "dur": 34973.622, + "args": { + "External id": 232940,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097123439.333, "dur": 34951.653, + "args": { + "External id": 232941,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097123473.866, "dur": 13.798, + "args": { + "External id": 232942,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097123493.570, "dur": 34860.749, + "args": { + "External id": 232943,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097123495.726, "dur": 34857.728, + "args": { + "External id": 232944,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097123499.450, "dur": 5.970, + "args": { + "External id": 232945,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097123506.950, "dur": 34842.970, + "args": { + "External id": 232946,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097158485.358, "dur": 8.579, + "args": { + "External id": 232947,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097158488.123, "dur": 5.481, + "args": { + "External id": 232948,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097158521.281, "dur": 398.198, + "args": { + "External id": 232949,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097158548.014, "dur": 366.668, + "args": { + "External id": 232950,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1525, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097158559.452, "dur": 349.501, + "args": { + "External id": 232951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097158940.911, "dur": 2.491, + "args": { + "External id": 232952,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1527, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159019.244, "dur": 7.851, + "args": { + "External id": 232953,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159070.813, "dur": 1.406, + "args": { + "External id": 232954,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159087.129, "dur": 1.730, + "args": { + "External id": 232955,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159101.368, "dur": 0.980, + "args": { + "External id": 232956,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159114.314, "dur": 2.951, + "args": { + "External id": 232957,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159126.599, "dur": 0.958, + "args": { + "External id": 232958,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159137.863, "dur": 0.835, + "args": { + "External id": 232959,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159148.906, "dur": 2.404, + "args": { + "External id": 232960,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159173.836, "dur": 2.788, + "args": { + "External id": 232961,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097159266.244, "dur": 2767.658, + "args": { + "External id": 232962,"Record function id": 0, "Ev Idx": 1537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 2070552, "tid": 2107648, + "ts": 5327097159285.312, "dur": 1027.306, + "args": { + "External id": 232963,"Record function id": 0, "Ev Idx": 1538 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2070552, "tid": 2107648, + "ts": 5327097159300.495, "dur": 312.282, + "args": { + "External id": 232964,"Record function id": 0, "Ev Idx": 1539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097159383.275, "dur": 3.984, + "args": { + "External id": 232965,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097159390.671, "dur": 0.719, + "args": { + "External id": 232966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097159393.511, "dur": 0.906, + "args": { + "External id": 232967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097159396.050, "dur": 1.167, + "args": { + "External id": 232968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097159398.779, "dur": 0.772, + "args": { + "External id": 232969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097159401.573, "dur": 0.762, + "args": { + "External id": 232970,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097159404.097, "dur": 3.633, + "args": { + "External id": 232971,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097159409.428, "dur": 1.028, + "args": { + "External id": 232972,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097159412.021, "dur": 0.675, + "args": { + "External id": 232973,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097159414.577, "dur": 0.990, + "args": { + "External id": 232974,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097159434.649, "dur": 150.334, + "args": { + "External id": 232975,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097159450.900, "dur": 129.830, + "args": { + "External id": 232976,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097159466.999, "dur": 13.661, + "args": { + "External id": 232977,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097159484.225, "dur": 67.368, + "args": { + "External id": 232978,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097159486.458, "dur": 64.863, + "args": { + "External id": 232979,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159490.160, "dur": 6.386, + "args": { + "External id": 232980,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097159498.131, "dur": 52.626, + "args": { + "External id": 232981,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1556 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 2070552, "tid": 2107648, + "ts": 5327097159745.803, "dur": 558.947, + "args": { + "External id": 232982,"Record function id": 0, "Ev Idx": 1557 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2070552, "tid": 2107648, + "ts": 5327097159764.090, "dur": 528.085, + "args": { + "External id": 232983,"Record function id": 0, "Ev Idx": 1558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097159824.529, "dur": 5.916, + "args": { + "External id": 232984,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097159847.353, "dur": 30.009, + "args": { + "External id": 232985,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159851.653, "dur": 1.690, + "args": { + "External id": 232986,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159855.418, "dur": 3.991, + "args": { + "External id": 232987,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159860.351, "dur": 0.559, + "args": { + "External id": 232988,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159862.351, "dur": 0.352, + "args": { + "External id": 232989,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159864.868, "dur": 0.371, + "args": { + "External id": 232990,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159866.483, "dur": 0.667, + "args": { + "External id": 232991,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159868.117, "dur": 0.246, + "args": { + "External id": 232992,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159870.253, "dur": 0.367, + "args": { + "External id": 232993,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159871.397, "dur": 0.382, + "args": { + "External id": 232994,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097159887.080, "dur": 33.423, + "args": { + "External id": 232995,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097159950.427, "dur": 117.246, + "args": { + "External id": 232996,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097159959.712, "dur": 5.518, + "args": { + "External id": 232997,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097159969.685, "dur": 26.767, + "args": { + "External id": 232998,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097159973.718, "dur": 22.223, + "args": { + "External id": 232999,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097159992.578, "dur": 0.953, + "args": { + "External id": 233000,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097160004.609, "dur": 23.460, + "args": { + "External id": 233001,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097160006.408, "dur": 0.446, + "args": { + "External id": 233002,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097160009.405, "dur": 0.391, + "args": { + "External id": 233003,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097160010.823, "dur": 0.352, + "args": { + "External id": 233004,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097160012.587, "dur": 0.977, + "args": { + "External id": 233005,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097160014.384, "dur": 0.379, + "args": { + "External id": 233006,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097160015.837, "dur": 0.402, + "args": { + "External id": 233007,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097160018.200, "dur": 2.291, + "args": { + "External id": 233008,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097160021.528, "dur": 0.710, + "args": { + "External id": 233009,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097160023.147, "dur": 0.361, + "args": { + "External id": 233010,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097160038.223, "dur": 22.108, + "args": { + "External id": 233011,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097160113.332, "dur": 113.869, + "args": { + "External id": 233012,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097160138.845, "dur": 85.000, + "args": { + "External id": 233013,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1588, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097160148.139, "dur": 71.349, + "args": { + "External id": 233014,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097160242.490, "dur": 1.577, + "args": { + "External id": 233015,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1590, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097160320.450, "dur": 1686.387, + "args": { + "External id": 233016,"Sequence number": 959148, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1591 + } + }, + { + "ph": "f", "id": 28, "pid": 2070552, "tid": 2107648, "ts": 5327097160320.450, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097160427.432, "dur": 102.532, + "args": { + "External id": 233017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097160570.046, "dur": 42.133, + "args": { + "External id": 233018,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097160673.348, "dur": 58.583, + "args": { + "External id": 233019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097160742.555, "dur": 33.087, + "args": { + "External id": 233020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097160785.341, "dur": 45.154, + "args": { + "External id": 233021,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097160841.978, "dur": 28.189, + "args": { + "External id": 233022,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097160880.172, "dur": 42.919, + "args": { + "External id": 233023,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097160949.842, "dur": 24.480, + "args": { + "External id": 233024,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097161010.854, "dur": 29.389, + "args": { + "External id": 233025,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097161065.409, "dur": 20.704, + "args": { + "External id": 233026,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097161101.118, "dur": 14.693, + "args": { + "External id": 233027,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097161125.808, "dur": 33.019, + "args": { + "External id": 233028,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097161162.125, "dur": 33.352, + "args": { + "External id": 233029,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097161222.410, "dur": 169.415, + "args": { + "External id": 233030,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097161299.408, "dur": 6.263, + "args": { + "External id": 233031,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097161307.430, "dur": 4.072, + "args": { + "External id": 233032,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097161431.103, "dur": 25.408, + "args": { + "External id": 233033,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097161470.054, "dur": 14.738, + "args": { + "External id": 233034,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097161492.842, "dur": 34.699, + "args": { + "External id": 233035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097161532.493, "dur": 35.071, + "args": { + "External id": 233036,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097161575.202, "dur": 22.046, + "args": { + "External id": 233037,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097161603.420, "dur": 67.387, + "args": { + "External id": 233038,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097161681.281, "dur": 24.921, + "args": { + "External id": 233039,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097161712.945, "dur": 49.344, + "args": { + "External id": 233040,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097161790.748, "dur": 25.932, + "args": { + "External id": 233041,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097161836.803, "dur": 30.105, + "args": { + "External id": 233042,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097161881.806, "dur": 20.188, + "args": { + "External id": 233043,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097161917.806, "dur": 15.353, + "args": { + "External id": 233044,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097161944.659, "dur": 15.632, + "args": { + "External id": 233045,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162055.598, "dur": 14.126, + "args": { + "External id": 233046,"Record function id": 0, "Ev Idx": 1621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162058.582, "dur": 10.251, + "args": { + "External id": 233047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162062.632, "dur": 5.266, + "args": { + "External id": 233048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162063.744, "dur": 4.041, + "args": { + "External id": 233049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162073.497, "dur": 4.600, + "args": { + "External id": 233050,"Record function id": 0, "Ev Idx": 1625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162074.704, "dur": 2.879, + "args": { + "External id": 233051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162075.839, "dur": 1.190, + "args": { + "External id": 233052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162076.213, "dur": 0.725, + "args": { + "External id": 233053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162081.267, "dur": 4.230, + "args": { + "External id": 233054,"Record function id": 0, "Ev Idx": 1629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162082.950, "dur": 2.146, + "args": { + "External id": 233055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162083.516, "dur": 1.131, + "args": { + "External id": 233056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162083.893, "dur": 0.665, + "args": { + "External id": 233057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162088.811, "dur": 4.088, + "args": { + "External id": 233058,"Record function id": 0, "Ev Idx": 1633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162089.913, "dur": 2.587, + "args": { + "External id": 233059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162090.550, "dur": 1.274, + "args": { + "External id": 233060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162091.126, "dur": 0.623, + "args": { + "External id": 233061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162096.243, "dur": 4.743, + "args": { + "External id": 233062,"Record function id": 0, "Ev Idx": 1637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162097.428, "dur": 3.116, + "args": { + "External id": 233063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162098.381, "dur": 1.476, + "args": { + "External id": 233064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162099.081, "dur": 0.701, + "args": { + "External id": 233065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162104.314, "dur": 4.012, + "args": { + "External id": 233066,"Record function id": 0, "Ev Idx": 1641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162105.210, "dur": 2.705, + "args": { + "External id": 233067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162105.649, "dur": 1.850, + "args": { + "External id": 233068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162106.510, "dur": 0.921, + "args": { + "External id": 233069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162111.602, "dur": 6.744, + "args": { + "External id": 233070,"Record function id": 0, "Ev Idx": 1645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162113.026, "dur": 4.918, + "args": { + "External id": 233071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162113.635, "dur": 3.876, + "args": { + "External id": 233072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162114.182, "dur": 3.261, + "args": { + "External id": 233073,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162121.748, "dur": 4.324, + "args": { + "External id": 233074,"Record function id": 0, "Ev Idx": 1649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162122.654, "dur": 2.999, + "args": { + "External id": 233075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162123.299, "dur": 1.956, + "args": { + "External id": 233076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162124.053, "dur": 1.128, + "args": { + "External id": 233077,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162129.395, "dur": 4.439, + "args": { + "External id": 233078,"Record function id": 0, "Ev Idx": 1653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097162130.944, "dur": 2.478, + "args": { + "External id": 233079,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162131.407, "dur": 1.504, + "args": { + "External id": 233080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097162132.138, "dur": 0.702, + "args": { + "External id": 233081,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097162137.974, "dur": 36469.771, + "args": { + "External id": 233082,"Record function id": 0, "Sequence number": 959147, "Fwd thread id": 1, "Ev Idx": 1657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097162139.145, "dur": 36459.359, + "args": { + "External id": 233083,"Sequence number": 959147, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1658 + } + }, + { + "ph": "f", "id": 29, "pid": 2070552, "tid": 2107648, "ts": 5327097162139.145, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 2070552, "tid": 2107648, + "ts": 5327097162167.554, "dur": 39.291, + "args": { + "External id": 233084,"Record function id": 0, "Ev Idx": 1659 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 2070552, "tid": 2107648, + "ts": 5327097162214.223, "dur": 66.072, + "args": { + "External id": 233085,"Record function id": 0, "Ev Idx": 1660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 2070552, "tid": 2107648, + "ts": 5327097162285.801, "dur": 36305.269, + "args": { + "External id": 233086,"Record function id": 0, "Ev Idx": 1661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097162374.129, "dur": 6.362, + "args": { + "External id": 233087,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097162389.516, "dur": 4.312, + "args": { + "External id": 233088,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097162409.976, "dur": 35292.981, + "args": { + "External id": 233089,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097162423.510, "dur": 35270.435, + "args": { + "External id": 233090,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097162472.667, "dur": 16.696, + "args": { + "External id": 233091,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097162495.341, "dur": 35162.406, + "args": { + "External id": 233092,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097162497.672, "dur": 35159.307, + "args": { + "External id": 233093,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097162503.398, "dur": 4.986, + "args": { + "External id": 233094,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097162510.020, "dur": 35143.447, + "args": { + "External id": 233095,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097197787.999, "dur": 8.312, + "args": { + "External id": 233096,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097197790.708, "dur": 5.232, + "args": { + "External id": 233097,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097197823.643, "dur": 480.438, + "args": { + "External id": 233098,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097197850.448, "dur": 448.411, + "args": { + "External id": 233099,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1674, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097197862.254, "dur": 430.809, + "args": { + "External id": 233100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097198326.293, "dur": 2.283, + "args": { + "External id": 233101,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1676, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097198389.622, "dur": 8.674, + "args": { + "External id": 233102,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097198440.331, "dur": 1.255, + "args": { + "External id": 233103,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097198457.295, "dur": 1.114, + "args": { + "External id": 233104,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097198470.368, "dur": 0.613, + "args": { + "External id": 233105,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097198482.528, "dur": 2.894, + "args": { + "External id": 233106,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097198495.714, "dur": 0.819, + "args": { + "External id": 233107,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097198506.840, "dur": 0.882, + "args": { + "External id": 233108,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097198518.927, "dur": 2.055, + "args": { + "External id": 233109,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097198531.798, "dur": 3.008, + "args": { + "External id": 233110,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097198652.549, "dur": 2693.986, + "args": { + "External id": 233111,"Record function id": 0, "Ev Idx": 1686 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 2070552, "tid": 2107648, + "ts": 5327097198675.679, "dur": 1031.783, + "args": { + "External id": 233112,"Record function id": 0, "Ev Idx": 1687 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2070552, "tid": 2107648, + "ts": 5327097198688.812, "dur": 331.646, + "args": { + "External id": 233113,"Record function id": 0, "Ev Idx": 1688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097198773.659, "dur": 4.881, + "args": { + "External id": 233114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097198781.804, "dur": 1.093, + "args": { + "External id": 233115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097198785.107, "dur": 0.812, + "args": { + "External id": 233116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097198788.101, "dur": 1.240, + "args": { + "External id": 233117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097198791.328, "dur": 0.991, + "args": { + "External id": 233118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097198794.178, "dur": 0.980, + "args": { + "External id": 233119,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097198797.095, "dur": 3.253, + "args": { + "External id": 233120,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097198802.082, "dur": 1.063, + "args": { + "External id": 233121,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097198805.161, "dur": 0.758, + "args": { + "External id": 233122,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097198807.679, "dur": 0.717, + "args": { + "External id": 233123,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097198825.912, "dur": 148.804, + "args": { + "External id": 233124,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097198841.759, "dur": 128.699, + "args": { + "External id": 233125,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097198858.518, "dur": 14.156, + "args": { + "External id": 233126,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097198876.526, "dur": 65.057, + "args": { + "External id": 233127,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097198879.033, "dur": 62.300, + "args": { + "External id": 233128,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097198882.763, "dur": 6.743, + "args": { + "External id": 233129,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097198891.043, "dur": 49.782, + "args": { + "External id": 233130,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 2070552, "tid": 2107648, + "ts": 5327097199110.514, "dur": 589.430, + "args": { + "External id": 233131,"Record function id": 0, "Ev Idx": 1706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2070552, "tid": 2107648, + "ts": 5327097199126.640, "dur": 559.981, + "args": { + "External id": 233132,"Record function id": 0, "Ev Idx": 1707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097199187.189, "dur": 5.296, + "args": { + "External id": 233133,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097199208.865, "dur": 33.854, + "args": { + "External id": 233134,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199213.447, "dur": 2.860, + "args": { + "External id": 233135,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199218.456, "dur": 2.377, + "args": { + "External id": 233136,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199221.696, "dur": 0.427, + "args": { + "External id": 233137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199224.698, "dur": 0.340, + "args": { + "External id": 233138,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199228.436, "dur": 0.420, + "args": { + "External id": 233139,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199230.434, "dur": 0.748, + "args": { + "External id": 233140,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199233.572, "dur": 0.379, + "args": { + "External id": 233141,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199235.233, "dur": 0.611, + "args": { + "External id": 233142,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199236.612, "dur": 1.454, + "args": { + "External id": 233143,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097199252.919, "dur": 33.950, + "args": { + "External id": 233144,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097199316.870, "dur": 104.382, + "args": { + "External id": 233145,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097199326.213, "dur": 5.898, + "args": { + "External id": 233146,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097199336.893, "dur": 9.569, + "args": { + "External id": 233147,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097199340.800, "dur": 5.229, + "args": { + "External id": 233148,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199343.967, "dur": 0.709, + "args": { + "External id": 233149,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097199355.782, "dur": 23.069, + "args": { + "External id": 233150,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199357.566, "dur": 0.439, + "args": { + "External id": 233151,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199360.399, "dur": 0.732, + "args": { + "External id": 233152,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199361.983, "dur": 0.817, + "args": { + "External id": 233153,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199364.262, "dur": 0.999, + "args": { + "External id": 233154,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199366.258, "dur": 0.308, + "args": { + "External id": 233155,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199367.714, "dur": 0.324, + "args": { + "External id": 233156,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199369.464, "dur": 2.520, + "args": { + "External id": 233157,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199373.400, "dur": 0.547, + "args": { + "External id": 233158,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097199374.731, "dur": 0.603, + "args": { + "External id": 233159,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097199394.864, "dur": 19.368, + "args": { + "External id": 233160,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097199467.376, "dur": 112.332, + "args": { + "External id": 233161,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097199491.673, "dur": 84.554, + "args": { + "External id": 233162,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1737, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097199501.279, "dur": 70.978, + "args": { + "External id": 233163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097199593.728, "dur": 1.856, + "args": { + "External id": 233164,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1739, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097199714.877, "dur": 1610.094, + "args": { + "External id": 233165,"Sequence number": 959146, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1740 + } + }, + { + "ph": "f", "id": 30, "pid": 2070552, "tid": 2107648, "ts": 5327097199714.877, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097199824.301, "dur": 106.115, + "args": { + "External id": 233166,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097199990.079, "dur": 43.268, + "args": { + "External id": 233167,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097200054.185, "dur": 51.664, + "args": { + "External id": 233168,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097200114.895, "dur": 32.304, + "args": { + "External id": 233169,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097200153.233, "dur": 44.785, + "args": { + "External id": 233170,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097200206.368, "dur": 28.713, + "args": { + "External id": 233171,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097200245.149, "dur": 42.583, + "args": { + "External id": 233172,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097200311.140, "dur": 22.643, + "args": { + "External id": 233173,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097200350.326, "dur": 26.478, + "args": { + "External id": 233174,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097200399.881, "dur": 19.843, + "args": { + "External id": 233175,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097200431.963, "dur": 14.145, + "args": { + "External id": 233176,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097200456.709, "dur": 29.170, + "args": { + "External id": 233177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097200488.919, "dur": 32.556, + "args": { + "External id": 233178,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097200553.128, "dur": 212.323, + "args": { + "External id": 233179,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097200663.374, "dur": 6.613, + "args": { + "External id": 233180,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097200672.153, "dur": 4.567, + "args": { + "External id": 233181,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097200802.532, "dur": 25.560, + "args": { + "External id": 233182,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097200839.823, "dur": 14.329, + "args": { + "External id": 233183,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097200863.122, "dur": 40.743, + "args": { + "External id": 233184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097200909.087, "dur": 33.940, + "args": { + "External id": 233185,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097200950.403, "dur": 21.908, + "args": { + "External id": 233186,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097200995.108, "dur": 35.698, + "args": { + "External id": 233187,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097201037.733, "dur": 21.255, + "args": { + "External id": 233188,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097201065.095, "dur": 38.345, + "args": { + "External id": 233189,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097201128.586, "dur": 28.344, + "args": { + "External id": 233190,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097201175.598, "dur": 23.811, + "args": { + "External id": 233191,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097201213.662, "dur": 20.690, + "args": { + "External id": 233192,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097201250.231, "dur": 15.607, + "args": { + "External id": 233193,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097201281.244, "dur": 14.924, + "args": { + "External id": 233194,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201367.164, "dur": 13.837, + "args": { + "External id": 233195,"Record function id": 0, "Ev Idx": 1770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201370.239, "dur": 9.740, + "args": { + "External id": 233196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201374.245, "dur": 4.972, + "args": { + "External id": 233197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201375.403, "dur": 3.719, + "args": { + "External id": 233198,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201384.665, "dur": 3.899, + "args": { + "External id": 233199,"Record function id": 0, "Ev Idx": 1774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201385.840, "dur": 2.274, + "args": { + "External id": 233200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201386.473, "dur": 1.157, + "args": { + "External id": 233201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201386.771, "dur": 0.784, + "args": { + "External id": 233202,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201391.887, "dur": 4.070, + "args": { + "External id": 233203,"Record function id": 0, "Ev Idx": 1778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201393.277, "dur": 2.261, + "args": { + "External id": 233204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201393.813, "dur": 1.311, + "args": { + "External id": 233205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201394.269, "dur": 0.769, + "args": { + "External id": 233206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201399.049, "dur": 3.755, + "args": { + "External id": 233207,"Record function id": 0, "Ev Idx": 1782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201400.195, "dur": 2.212, + "args": { + "External id": 233208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201400.734, "dur": 1.262, + "args": { + "External id": 233209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201401.277, "dur": 0.643, + "args": { + "External id": 233210,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201406.202, "dur": 4.113, + "args": { + "External id": 233211,"Record function id": 0, "Ev Idx": 1786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201407.410, "dur": 2.482, + "args": { + "External id": 233212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201408.099, "dur": 1.367, + "args": { + "External id": 233213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201408.445, "dur": 0.953, + "args": { + "External id": 233214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201413.635, "dur": 4.780, + "args": { + "External id": 233215,"Record function id": 0, "Ev Idx": 1790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201415.132, "dur": 2.863, + "args": { + "External id": 233216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201415.768, "dur": 1.680, + "args": { + "External id": 233217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201416.511, "dur": 0.861, + "args": { + "External id": 233218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201421.879, "dur": 6.172, + "args": { + "External id": 233219,"Record function id": 0, "Ev Idx": 1794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201422.867, "dur": 4.775, + "args": { + "External id": 233220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201423.564, "dur": 3.364, + "args": { + "External id": 233221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201424.215, "dur": 2.640, + "args": { + "External id": 233222,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201431.295, "dur": 4.438, + "args": { + "External id": 233223,"Record function id": 0, "Ev Idx": 1798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201432.748, "dur": 2.567, + "args": { + "External id": 233224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201433.406, "dur": 1.473, + "args": { + "External id": 233225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201433.995, "dur": 0.774, + "args": { + "External id": 233226,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201439.218, "dur": 4.270, + "args": { + "External id": 233227,"Record function id": 0, "Ev Idx": 1802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097201440.474, "dur": 2.611, + "args": { + "External id": 233228,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201440.939, "dur": 1.607, + "args": { + "External id": 233229,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097201441.717, "dur": 0.757, + "args": { + "External id": 233230,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097201447.297, "dur": 38509.445, + "args": { + "External id": 233231,"Record function id": 0, "Sequence number": 959145, "Fwd thread id": 1, "Ev Idx": 1806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097201448.721, "dur": 38499.635, + "args": { + "External id": 233232,"Sequence number": 959145, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1807 + } + }, + { + "ph": "f", "id": 31, "pid": 2070552, "tid": 2107648, "ts": 5327097201448.721, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 2070552, "tid": 2107648, + "ts": 5327097201475.379, "dur": 37.435, + "args": { + "External id": 233233,"Record function id": 0, "Ev Idx": 1808 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 2070552, "tid": 2107648, + "ts": 5327097201519.818, "dur": 63.832, + "args": { + "External id": 233234,"Record function id": 0, "Ev Idx": 1809 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 2070552, "tid": 2107648, + "ts": 5327097201589.310, "dur": 38351.586, + "args": { + "External id": 233235,"Record function id": 0, "Ev Idx": 1810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097201712.882, "dur": 7.308, + "args": { + "External id": 233236,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097201730.629, "dur": 4.969, + "args": { + "External id": 233237,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097201750.892, "dur": 37264.127, + "args": { + "External id": 233238,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097201764.736, "dur": 37241.547, + "args": { + "External id": 233239,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097201807.058, "dur": 14.674, + "args": { + "External id": 233240,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097201827.813, "dur": 37124.678, + "args": { + "External id": 233241,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097201830.065, "dur": 37121.765, + "args": { + "External id": 233242,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097201833.165, "dur": 5.114, + "args": { + "External id": 233243,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097201839.740, "dur": 37109.089, + "args": { + "External id": 233244,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097239103.159, "dur": 8.685, + "args": { + "External id": 233245,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097239105.942, "dur": 5.595, + "args": { + "External id": 233246,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097239146.999, "dur": 464.070, + "args": { + "External id": 233247,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097239170.652, "dur": 435.115, + "args": { + "External id": 233248,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1823, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097239181.476, "dur": 417.982, + "args": { + "External id": 233249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097239662.920, "dur": 3.086, + "args": { + "External id": 233250,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1825, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097239732.819, "dur": 7.903, + "args": { + "External id": 233251,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097239783.290, "dur": 1.176, + "args": { + "External id": 233252,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097239799.473, "dur": 1.297, + "args": { + "External id": 233253,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097239813.132, "dur": 1.006, + "args": { + "External id": 233254,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097239826.152, "dur": 2.822, + "args": { + "External id": 233255,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097239839.855, "dur": 0.888, + "args": { + "External id": 233256,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097239851.327, "dur": 0.937, + "args": { + "External id": 233257,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097239864.111, "dur": 1.768, + "args": { + "External id": 233258,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097239877.044, "dur": 2.946, + "args": { + "External id": 233259,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097239971.831, "dur": 2706.371, + "args": { + "External id": 233260,"Record function id": 0, "Ev Idx": 1835 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 2070552, "tid": 2107648, + "ts": 5327097240006.455, "dur": 1010.958, + "args": { + "External id": 233261,"Record function id": 0, "Ev Idx": 1836 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2070552, "tid": 2107648, + "ts": 5327097240021.535, "dur": 313.378, + "args": { + "External id": 233262,"Record function id": 0, "Ev Idx": 1837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097240107.874, "dur": 4.738, + "args": { + "External id": 233263,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097240116.098, "dur": 1.159, + "args": { + "External id": 233264,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097240119.600, "dur": 0.927, + "args": { + "External id": 233265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097240122.652, "dur": 0.796, + "args": { + "External id": 233266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097240125.380, "dur": 1.076, + "args": { + "External id": 233267,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097240128.037, "dur": 0.877, + "args": { + "External id": 233268,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097240130.503, "dur": 3.555, + "args": { + "External id": 233269,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097240136.010, "dur": 0.941, + "args": { + "External id": 233270,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097240138.481, "dur": 1.100, + "args": { + "External id": 233271,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097240140.958, "dur": 0.723, + "args": { + "External id": 233272,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097240160.162, "dur": 146.193, + "args": { + "External id": 233273,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097240177.500, "dur": 124.877, + "args": { + "External id": 233274,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097240190.836, "dur": 12.731, + "args": { + "External id": 233275,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097240207.465, "dur": 66.404, + "args": { + "External id": 233276,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097240209.894, "dur": 63.656, + "args": { + "External id": 233277,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240214.589, "dur": 6.150, + "args": { + "External id": 233278,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097240222.182, "dur": 50.728, + "args": { + "External id": 233279,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1854 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 2070552, "tid": 2107648, + "ts": 5327097240420.605, "dur": 588.595, + "args": { + "External id": 233280,"Record function id": 0, "Ev Idx": 1855 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2070552, "tid": 2107648, + "ts": 5327097240436.193, "dur": 556.826, + "args": { + "External id": 233281,"Record function id": 0, "Ev Idx": 1856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097240493.904, "dur": 4.738, + "args": { + "External id": 233282,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097240514.273, "dur": 27.375, + "args": { + "External id": 233283,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240518.711, "dur": 1.588, + "args": { + "External id": 233284,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240522.422, "dur": 2.873, + "args": { + "External id": 233285,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240526.883, "dur": 0.329, + "args": { + "External id": 233286,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240528.191, "dur": 0.355, + "args": { + "External id": 233287,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240530.573, "dur": 0.362, + "args": { + "External id": 233288,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240531.791, "dur": 0.243, + "args": { + "External id": 233289,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240533.320, "dur": 0.397, + "args": { + "External id": 233290,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240535.140, "dur": 0.625, + "args": { + "External id": 233291,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240537.001, "dur": 0.446, + "args": { + "External id": 233292,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097240551.329, "dur": 31.150, + "args": { + "External id": 233293,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097240611.036, "dur": 144.482, + "args": { + "External id": 233294,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097240656.349, "dur": 6.887, + "args": { + "External id": 233295,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097240669.800, "dur": 10.248, + "args": { + "External id": 233296,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097240674.042, "dur": 5.522, + "args": { + "External id": 233297,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240677.325, "dur": 0.605, + "args": { + "External id": 233298,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097240687.332, "dur": 25.175, + "args": { + "External id": 233299,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240689.760, "dur": 0.547, + "args": { + "External id": 233300,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240692.167, "dur": 0.349, + "args": { + "External id": 233301,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240693.887, "dur": 0.819, + "args": { + "External id": 233302,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240695.784, "dur": 1.050, + "args": { + "External id": 233303,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240698.894, "dur": 0.427, + "args": { + "External id": 233304,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240700.129, "dur": 0.347, + "args": { + "External id": 233305,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240703.553, "dur": 2.565, + "args": { + "External id": 233306,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240707.009, "dur": 0.331, + "args": { + "External id": 233307,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097240708.606, "dur": 0.367, + "args": { + "External id": 233308,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097240724.151, "dur": 23.518, + "args": { + "External id": 233309,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097240799.565, "dur": 113.393, + "args": { + "External id": 233310,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097240824.702, "dur": 84.784, + "args": { + "External id": 233311,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1886, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097240833.371, "dur": 70.801, + "args": { + "External id": 233312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097240928.603, "dur": 1.751, + "args": { + "External id": 233313,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1888, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097241024.884, "dur": 1630.326, + "args": { + "External id": 233314,"Sequence number": 959144, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1889 + } + }, + { + "ph": "f", "id": 32, "pid": 2070552, "tid": 2107648, "ts": 5327097241024.884, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097241136.017, "dur": 105.903, + "args": { + "External id": 233315,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097241282.456, "dur": 39.811, + "args": { + "External id": 233316,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097241344.460, "dur": 49.424, + "args": { + "External id": 233317,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097241403.973, "dur": 32.207, + "args": { + "External id": 233318,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097241442.792, "dur": 45.447, + "args": { + "External id": 233319,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097241496.614, "dur": 28.134, + "args": { + "External id": 233320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097241531.328, "dur": 42.546, + "args": { + "External id": 233321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097241597.445, "dur": 60.243, + "args": { + "External id": 233322,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097241679.513, "dur": 29.605, + "args": { + "External id": 233323,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097241730.938, "dur": 19.575, + "args": { + "External id": 233324,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097241764.190, "dur": 14.840, + "args": { + "External id": 233325,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097241790.106, "dur": 34.931, + "args": { + "External id": 233326,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097241828.623, "dur": 34.576, + "args": { + "External id": 233327,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097241889.838, "dur": 188.107, + "args": { + "External id": 233328,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097241964.173, "dur": 6.297, + "args": { + "External id": 233329,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097241972.342, "dur": 20.925, + "args": { + "External id": 233330,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097242115.251, "dur": 26.012, + "args": { + "External id": 233331,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097242153.005, "dur": 14.144, + "args": { + "External id": 233332,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097242178.295, "dur": 40.639, + "args": { + "External id": 233333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097242224.866, "dur": 35.271, + "args": { + "External id": 233334,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097242267.172, "dur": 22.127, + "args": { + "External id": 233335,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097242295.986, "dur": 29.688, + "args": { + "External id": 233336,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097242331.763, "dur": 21.693, + "args": { + "External id": 233337,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097242361.199, "dur": 45.929, + "args": { + "External id": 233338,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097242431.001, "dur": 23.205, + "args": { + "External id": 233339,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097242474.458, "dur": 24.569, + "args": { + "External id": 233340,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097242513.402, "dur": 20.016, + "args": { + "External id": 233341,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097242547.922, "dur": 15.738, + "args": { + "External id": 233342,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097242576.754, "dur": 17.401, + "args": { + "External id": 233343,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242700.485, "dur": 14.411, + "args": { + "External id": 233344,"Record function id": 0, "Ev Idx": 1919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242703.346, "dur": 10.589, + "args": { + "External id": 233345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242707.511, "dur": 5.493, + "args": { + "External id": 233346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242708.845, "dur": 4.033, + "args": { + "External id": 233347,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242718.611, "dur": 4.673, + "args": { + "External id": 233348,"Record function id": 0, "Ev Idx": 1923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242719.698, "dur": 3.143, + "args": { + "External id": 233349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242720.758, "dur": 1.625, + "args": { + "External id": 233350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242721.314, "dur": 0.980, + "args": { + "External id": 233351,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242726.510, "dur": 3.933, + "args": { + "External id": 233352,"Record function id": 0, "Ev Idx": 1927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242727.503, "dur": 2.541, + "args": { + "External id": 233353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242728.530, "dur": 1.080, + "args": { + "External id": 233354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242728.894, "dur": 0.643, + "args": { + "External id": 233355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242733.519, "dur": 4.444, + "args": { + "External id": 233356,"Record function id": 0, "Ev Idx": 1931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242734.670, "dur": 2.888, + "args": { + "External id": 233357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242735.238, "dur": 1.757, + "args": { + "External id": 233358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242736.044, "dur": 0.879, + "args": { + "External id": 233359,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242741.110, "dur": 6.168, + "args": { + "External id": 233360,"Record function id": 0, "Ev Idx": 1935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242742.460, "dur": 4.394, + "args": { + "External id": 233361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242742.919, "dur": 3.377, + "args": { + "External id": 233362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242743.728, "dur": 2.495, + "args": { + "External id": 233363,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242750.455, "dur": 3.976, + "args": { + "External id": 233364,"Record function id": 0, "Ev Idx": 1939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242751.354, "dur": 2.671, + "args": { + "External id": 233365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242752.102, "dur": 1.494, + "args": { + "External id": 233366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242752.749, "dur": 0.772, + "args": { + "External id": 233367,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242757.744, "dur": 3.366, + "args": { + "External id": 233368,"Record function id": 0, "Ev Idx": 1943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242758.655, "dur": 2.054, + "args": { + "External id": 233369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242759.113, "dur": 1.047, + "args": { + "External id": 233370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242759.472, "dur": 0.612, + "args": { + "External id": 233371,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242764.358, "dur": 4.797, + "args": { + "External id": 233372,"Record function id": 0, "Ev Idx": 1947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242765.551, "dur": 3.183, + "args": { + "External id": 233373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242766.428, "dur": 1.753, + "args": { + "External id": 233374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242767.439, "dur": 0.668, + "args": { + "External id": 233375,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242772.308, "dur": 3.921, + "args": { + "External id": 233376,"Record function id": 0, "Ev Idx": 1951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097242773.325, "dur": 2.493, + "args": { + "External id": 233377,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242774.001, "dur": 1.386, + "args": { + "External id": 233378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097242774.607, "dur": 0.712, + "args": { + "External id": 233379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097242780.530, "dur": 36354.463, + "args": { + "External id": 233380,"Record function id": 0, "Sequence number": 959143, "Fwd thread id": 1, "Ev Idx": 1955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097242781.986, "dur": 36344.896, + "args": { + "External id": 233381,"Sequence number": 959143, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1956 + } + }, + { + "ph": "f", "id": 33, "pid": 2070552, "tid": 2107648, "ts": 5327097242781.986, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 2070552, "tid": 2107648, + "ts": 5327097242811.566, "dur": 38.335, + "args": { + "External id": 233382,"Record function id": 0, "Ev Idx": 1957 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 2070552, "tid": 2107648, + "ts": 5327097242857.449, "dur": 67.021, + "args": { + "External id": 233383,"Record function id": 0, "Ev Idx": 1958 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 2070552, "tid": 2107648, + "ts": 5327097242930.278, "dur": 36188.844, + "args": { + "External id": 233384,"Record function id": 0, "Ev Idx": 1959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097243034.815, "dur": 7.664, + "args": { + "External id": 233385,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097243054.252, "dur": 4.877, + "args": { + "External id": 233386,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097243074.214, "dur": 35194.281, + "args": { + "External id": 233387,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097243088.186, "dur": 35171.483, + "args": { + "External id": 233388,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097243127.397, "dur": 13.766, + "args": { + "External id": 233389,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097243147.263, "dur": 35074.986, + "args": { + "External id": 233390,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097243150.415, "dur": 35071.112, + "args": { + "External id": 233391,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097243153.544, "dur": 7.227, + "args": { + "External id": 233392,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097243162.285, "dur": 35055.706, + "args": { + "External id": 233393,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097278354.460, "dur": 8.749, + "args": { + "External id": 233394,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097278357.106, "dur": 5.836, + "args": { + "External id": 233395,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097278389.622, "dur": 426.492, + "args": { + "External id": 233396,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097278417.094, "dur": 393.884, + "args": { + "External id": 233397,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1972, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097278427.412, "dur": 377.042, + "args": { + "External id": 233398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097278837.119, "dur": 2.206, + "args": { + "External id": 233399,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1974, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097278899.744, "dur": 6.413, + "args": { + "External id": 233400,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097278948.704, "dur": 1.202, + "args": { + "External id": 233401,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097278965.014, "dur": 1.585, + "args": { + "External id": 233402,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097278992.716, "dur": 4.073, + "args": { + "External id": 233403,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279011.275, "dur": 1.153, + "args": { + "External id": 233404,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279023.224, "dur": 1.137, + "args": { + "External id": 233405,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279035.316, "dur": 0.912, + "args": { + "External id": 233406,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279046.909, "dur": 3.620, + "args": { + "External id": 233407,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279060.849, "dur": 0.830, + "args": { + "External id": 233408,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097279150.317, "dur": 2704.651, + "args": { + "External id": 233409,"Record function id": 0, "Ev Idx": 1984 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 2070552, "tid": 2107648, + "ts": 5327097279169.135, "dur": 1009.433, + "args": { + "External id": 233410,"Record function id": 0, "Ev Idx": 1985 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2070552, "tid": 2107648, + "ts": 5327097279182.277, "dur": 304.600, + "args": { + "External id": 233411,"Record function id": 0, "Ev Idx": 1986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097279265.430, "dur": 3.883, + "args": { + "External id": 233412,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097279272.982, "dur": 1.137, + "args": { + "External id": 233413,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097279276.228, "dur": 1.373, + "args": { + "External id": 233414,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097279279.452, "dur": 0.909, + "args": { + "External id": 233415,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097279282.476, "dur": 2.852, + "args": { + "External id": 233416,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097279287.066, "dur": 0.842, + "args": { + "External id": 233417,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097279289.746, "dur": 1.695, + "args": { + "External id": 233418,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097279292.942, "dur": 1.350, + "args": { + "External id": 233419,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097279295.891, "dur": 0.967, + "args": { + "External id": 233420,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097279298.520, "dur": 0.654, + "args": { + "External id": 233421,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097279317.120, "dur": 142.637, + "args": { + "External id": 233422,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097279332.618, "dur": 122.736, + "args": { + "External id": 233423,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097279345.783, "dur": 11.779, + "args": { + "External id": 233424,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097279361.629, "dur": 66.107, + "args": { + "External id": 233425,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097279363.954, "dur": 63.432, + "args": { + "External id": 233426,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279367.536, "dur": 6.510, + "args": { + "External id": 233427,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097279375.574, "dur": 51.264, + "args": { + "External id": 233428,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2003 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 2070552, "tid": 2107648, + "ts": 5327097279572.146, "dur": 599.077, + "args": { + "External id": 233429,"Record function id": 0, "Ev Idx": 2004 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2070552, "tid": 2107648, + "ts": 5327097279588.821, "dur": 569.817, + "args": { + "External id": 233430,"Record function id": 0, "Ev Idx": 2005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097279688.586, "dur": 8.023, + "args": { + "External id": 233431,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097279713.617, "dur": 29.888, + "args": { + "External id": 233432,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279718.355, "dur": 2.484, + "args": { + "External id": 233433,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279723.267, "dur": 0.555, + "args": { + "External id": 233434,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279724.872, "dur": 0.445, + "args": { + "External id": 233435,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279727.477, "dur": 0.356, + "args": { + "External id": 233436,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279729.209, "dur": 0.672, + "args": { + "External id": 233437,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279731.671, "dur": 0.307, + "args": { + "External id": 233438,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279733.532, "dur": 0.334, + "args": { + "External id": 233439,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279735.548, "dur": 2.771, + "args": { + "External id": 233440,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279739.108, "dur": 0.433, + "args": { + "External id": 233441,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097279753.308, "dur": 34.695, + "args": { + "External id": 233442,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097279820.115, "dur": 97.700, + "args": { + "External id": 233443,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097279830.208, "dur": 3.307, + "args": { + "External id": 233444,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097279838.142, "dur": 8.989, + "args": { + "External id": 233445,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097279842.035, "dur": 4.675, + "args": { + "External id": 233446,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279845.102, "dur": 0.461, + "args": { + "External id": 233447,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097279853.841, "dur": 24.729, + "args": { + "External id": 233448,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279856.296, "dur": 0.647, + "args": { + "External id": 233449,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279858.564, "dur": 0.785, + "args": { + "External id": 233450,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279860.652, "dur": 0.488, + "args": { + "External id": 233451,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279862.562, "dur": 0.343, + "args": { + "External id": 233452,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279864.899, "dur": 2.561, + "args": { + "External id": 233453,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279868.775, "dur": 0.386, + "args": { + "External id": 233454,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279870.990, "dur": 0.470, + "args": { + "External id": 233455,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279872.232, "dur": 0.548, + "args": { + "External id": 233456,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097279874.334, "dur": 0.845, + "args": { + "External id": 233457,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097279890.079, "dur": 20.146, + "args": { + "External id": 233458,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097279961.810, "dur": 130.872, + "args": { + "External id": 233459,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097280001.138, "dur": 87.895, + "args": { + "External id": 233460,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2035, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097280010.862, "dur": 74.222, + "args": { + "External id": 233461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097280108.084, "dur": 1.578, + "args": { + "External id": 233462,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2037, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097280185.559, "dur": 1646.626, + "args": { + "External id": 233463,"Sequence number": 959142, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2038 + } + }, + { + "ph": "f", "id": 34, "pid": 2070552, "tid": 2107648, "ts": 5327097280185.559, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097280295.366, "dur": 102.862, + "args": { + "External id": 233464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097280441.082, "dur": 38.836, + "args": { + "External id": 233465,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097280495.386, "dur": 48.343, + "args": { + "External id": 233466,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097280552.762, "dur": 32.515, + "args": { + "External id": 233467,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097280594.111, "dur": 80.301, + "args": { + "External id": 233468,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097280685.085, "dur": 34.004, + "args": { + "External id": 233469,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097280726.999, "dur": 42.464, + "args": { + "External id": 233470,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097280794.976, "dur": 23.974, + "args": { + "External id": 233471,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097280837.271, "dur": 29.013, + "args": { + "External id": 233472,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097280904.819, "dur": 20.678, + "args": { + "External id": 233473,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097280938.430, "dur": 14.297, + "args": { + "External id": 233474,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097280962.326, "dur": 48.294, + "args": { + "External id": 233475,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097281015.714, "dur": 36.330, + "args": { + "External id": 233476,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097281082.641, "dur": 170.013, + "args": { + "External id": 233477,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097281159.102, "dur": 5.627, + "args": { + "External id": 233478,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097281166.515, "dur": 2.867, + "args": { + "External id": 233479,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097281289.160, "dur": 26.780, + "args": { + "External id": 233480,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097281327.934, "dur": 14.158, + "args": { + "External id": 233481,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097281350.190, "dur": 33.806, + "args": { + "External id": 233482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097281390.376, "dur": 34.128, + "args": { + "External id": 233483,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097281430.711, "dur": 21.587, + "args": { + "External id": 233484,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097281459.178, "dur": 29.381, + "args": { + "External id": 233485,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097281495.043, "dur": 21.989, + "args": { + "External id": 233486,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097281523.611, "dur": 29.579, + "args": { + "External id": 233487,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097281570.334, "dur": 22.529, + "args": { + "External id": 233488,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097281655.196, "dur": 38.006, + "args": { + "External id": 233489,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097281716.051, "dur": 17.544, + "args": { + "External id": 233490,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097281753.979, "dur": 15.267, + "args": { + "External id": 233491,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097281784.655, "dur": 16.296, + "args": { + "External id": 233492,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281879.601, "dur": 14.553, + "args": { + "External id": 233493,"Record function id": 0, "Ev Idx": 2068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281882.672, "dur": 10.454, + "args": { + "External id": 233494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281886.869, "dur": 5.397, + "args": { + "External id": 233495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281887.997, "dur": 4.189, + "args": { + "External id": 233496,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281897.826, "dur": 4.875, + "args": { + "External id": 233497,"Record function id": 0, "Ev Idx": 2072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281899.090, "dur": 3.185, + "args": { + "External id": 233498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281900.166, "dur": 1.544, + "args": { + "External id": 233499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281900.512, "dur": 1.093, + "args": { + "External id": 233500,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281905.948, "dur": 4.808, + "args": { + "External id": 233501,"Record function id": 0, "Ev Idx": 2076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281907.220, "dur": 3.145, + "args": { + "External id": 233502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281908.112, "dur": 1.545, + "args": { + "External id": 233503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281908.645, "dur": 0.921, + "args": { + "External id": 233504,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281913.876, "dur": 4.371, + "args": { + "External id": 233505,"Record function id": 0, "Ev Idx": 2080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281914.993, "dur": 2.858, + "args": { + "External id": 233506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281915.811, "dur": 1.587, + "args": { + "External id": 233507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281916.603, "dur": 0.707, + "args": { + "External id": 233508,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281924.536, "dur": 6.787, + "args": { + "External id": 233509,"Record function id": 0, "Ev Idx": 2084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281926.137, "dur": 4.785, + "args": { + "External id": 233510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281926.665, "dur": 3.811, + "args": { + "External id": 233511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281927.738, "dur": 2.653, + "args": { + "External id": 233512,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281934.303, "dur": 4.633, + "args": { + "External id": 233513,"Record function id": 0, "Ev Idx": 2088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281935.723, "dur": 2.826, + "args": { + "External id": 233514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281936.372, "dur": 1.499, + "args": { + "External id": 233515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281937.005, "dur": 0.780, + "args": { + "External id": 233516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281942.363, "dur": 4.130, + "args": { + "External id": 233517,"Record function id": 0, "Ev Idx": 2092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281943.369, "dur": 2.731, + "args": { + "External id": 233518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281944.020, "dur": 1.456, + "args": { + "External id": 233519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281944.741, "dur": 0.659, + "args": { + "External id": 233520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281949.480, "dur": 4.680, + "args": { + "External id": 233521,"Record function id": 0, "Ev Idx": 2096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281950.847, "dur": 2.920, + "args": { + "External id": 233522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281951.558, "dur": 1.553, + "args": { + "External id": 233523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281952.194, "dur": 0.843, + "args": { + "External id": 233524,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281957.810, "dur": 3.417, + "args": { + "External id": 233525,"Record function id": 0, "Ev Idx": 2100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097281958.828, "dur": 1.987, + "args": { + "External id": 233526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281959.287, "dur": 1.088, + "args": { + "External id": 233527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097281959.650, "dur": 0.657, + "args": { + "External id": 233528,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097281965.318, "dur": 36094.753, + "args": { + "External id": 233529,"Record function id": 0, "Sequence number": 959141, "Fwd thread id": 1, "Ev Idx": 2104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097281966.691, "dur": 36084.036, + "args": { + "External id": 233530,"Sequence number": 959141, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2105 + } + }, + { + "ph": "f", "id": 35, "pid": 2070552, "tid": 2107648, "ts": 5327097281966.691, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 2070552, "tid": 2107648, + "ts": 5327097282008.219, "dur": 36.363, + "args": { + "External id": 233531,"Record function id": 0, "Ev Idx": 2106 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 2070552, "tid": 2107648, + "ts": 5327097282051.837, "dur": 67.098, + "args": { + "External id": 233532,"Record function id": 0, "Ev Idx": 2107 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 2070552, "tid": 2107648, + "ts": 5327097282124.764, "dur": 35918.038, + "args": { + "External id": 233533,"Record function id": 0, "Ev Idx": 2108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097282210.089, "dur": 6.806, + "args": { + "External id": 233534,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097282227.475, "dur": 5.223, + "args": { + "External id": 233535,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097282246.832, "dur": 34987.739, + "args": { + "External id": 233536,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097282259.405, "dur": 34965.472, + "args": { + "External id": 233537,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097282301.740, "dur": 13.316, + "args": { + "External id": 233538,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097282321.075, "dur": 34862.754, + "args": { + "External id": 233539,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097282323.914, "dur": 34859.198, + "args": { + "External id": 233540,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097282327.865, "dur": 6.517, + "args": { + "External id": 233541,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097282335.906, "dur": 34843.866, + "args": { + "External id": 233542,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097317320.807, "dur": 9.208, + "args": { + "External id": 233543,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097317323.609, "dur": 6.094, + "args": { + "External id": 233544,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097317355.726, "dur": 387.524, + "args": { + "External id": 233545,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097317380.700, "dur": 357.438, + "args": { + "External id": 233546,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2121, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097317390.599, "dur": 341.938, + "args": { + "External id": 233547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097317764.198, "dur": 2.159, + "args": { + "External id": 233548,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2123, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097317824.300, "dur": 6.391, + "args": { + "External id": 233549,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097317872.923, "dur": 1.626, + "args": { + "External id": 233550,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097317890.592, "dur": 1.406, + "args": { + "External id": 233551,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097317903.422, "dur": 2.987, + "args": { + "External id": 233552,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097317918.368, "dur": 0.744, + "args": { + "External id": 233553,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097317930.309, "dur": 0.662, + "args": { + "External id": 233554,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097317941.227, "dur": 0.836, + "args": { + "External id": 233555,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097317954.367, "dur": 3.893, + "args": { + "External id": 233556,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097317968.011, "dur": 0.872, + "args": { + "External id": 233557,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097318075.815, "dur": 2671.845, + "args": { + "External id": 233558,"Record function id": 0, "Ev Idx": 2133 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 2070552, "tid": 2107648, + "ts": 5327097318095.805, "dur": 978.874, + "args": { + "External id": 233559,"Record function id": 0, "Ev Idx": 2134 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2070552, "tid": 2107648, + "ts": 5327097318111.715, "dur": 301.831, + "args": { + "External id": 233560,"Record function id": 0, "Ev Idx": 2135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097318194.499, "dur": 4.912, + "args": { + "External id": 233561,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097318202.485, "dur": 1.037, + "args": { + "External id": 233562,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097318205.880, "dur": 1.057, + "args": { + "External id": 233563,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097318208.561, "dur": 1.207, + "args": { + "External id": 233564,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097318211.333, "dur": 2.749, + "args": { + "External id": 233565,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097318215.804, "dur": 1.105, + "args": { + "External id": 233566,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097318218.440, "dur": 1.530, + "args": { + "External id": 233567,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097318221.477, "dur": 0.943, + "args": { + "External id": 233568,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097318224.326, "dur": 0.855, + "args": { + "External id": 233569,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097318226.827, "dur": 0.640, + "args": { + "External id": 233570,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097318246.179, "dur": 141.189, + "args": { + "External id": 233571,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097318261.532, "dur": 121.736, + "args": { + "External id": 233572,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097318275.934, "dur": 12.529, + "args": { + "External id": 233573,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097318292.798, "dur": 62.876, + "args": { + "External id": 233574,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097318295.290, "dur": 60.067, + "args": { + "External id": 233575,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318298.721, "dur": 5.124, + "args": { + "External id": 233576,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097318305.297, "dur": 49.523, + "args": { + "External id": 233577,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2152 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 2070552, "tid": 2107648, + "ts": 5327097318494.716, "dur": 572.796, + "args": { + "External id": 233578,"Record function id": 0, "Ev Idx": 2153 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2070552, "tid": 2107648, + "ts": 5327097318509.100, "dur": 545.240, + "args": { + "External id": 233579,"Record function id": 0, "Ev Idx": 2154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097318564.352, "dur": 6.010, + "args": { + "External id": 233580,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097318586.063, "dur": 29.724, + "args": { + "External id": 233581,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318590.406, "dur": 1.334, + "args": { + "External id": 233582,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318593.331, "dur": 0.965, + "args": { + "External id": 233583,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318596.282, "dur": 0.675, + "args": { + "External id": 233584,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318598.501, "dur": 1.124, + "args": { + "External id": 233585,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318600.564, "dur": 0.670, + "args": { + "External id": 233586,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318602.653, "dur": 0.376, + "args": { + "External id": 233587,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318604.897, "dur": 0.667, + "args": { + "External id": 233588,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318607.278, "dur": 2.708, + "args": { + "External id": 233589,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318611.396, "dur": 0.389, + "args": { + "External id": 233590,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097318661.015, "dur": 35.465, + "args": { + "External id": 233591,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097318728.428, "dur": 96.626, + "args": { + "External id": 233592,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097318738.485, "dur": 4.121, + "args": { + "External id": 233593,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097318747.632, "dur": 10.320, + "args": { + "External id": 233594,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097318751.439, "dur": 6.089, + "args": { + "External id": 233595,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318754.992, "dur": 1.106, + "args": { + "External id": 233596,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097318764.851, "dur": 20.170, + "args": { + "External id": 233597,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318766.569, "dur": 0.696, + "args": { + "External id": 233598,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318768.748, "dur": 0.429, + "args": { + "External id": 233599,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318769.961, "dur": 0.818, + "args": { + "External id": 233600,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318772.106, "dur": 0.280, + "args": { + "External id": 233601,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318773.118, "dur": 2.349, + "args": { + "External id": 233602,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318776.465, "dur": 0.427, + "args": { + "External id": 233603,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318777.919, "dur": 0.471, + "args": { + "External id": 233604,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318778.974, "dur": 0.621, + "args": { + "External id": 233605,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097318780.494, "dur": 0.529, + "args": { + "External id": 233606,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097318794.930, "dur": 22.691, + "args": { + "External id": 233607,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097318867.829, "dur": 104.988, + "args": { + "External id": 233608,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097318888.334, "dur": 81.397, + "args": { + "External id": 233609,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2184, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097318896.667, "dur": 68.848, + "args": { + "External id": 233610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097319001.152, "dur": 2.612, + "args": { + "External id": 233611,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2186, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097319082.947, "dur": 1639.138, + "args": { + "External id": 233612,"Sequence number": 959140, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2187 + } + }, + { + "ph": "f", "id": 36, "pid": 2070552, "tid": 2107648, "ts": 5327097319082.947, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097319192.340, "dur": 105.489, + "args": { + "External id": 233613,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097319338.005, "dur": 38.819, + "args": { + "External id": 233614,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097319393.417, "dur": 49.287, + "args": { + "External id": 233615,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097319452.724, "dur": 31.545, + "args": { + "External id": 233616,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097319492.154, "dur": 45.546, + "args": { + "External id": 233617,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097319544.751, "dur": 27.097, + "args": { + "External id": 233618,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097319579.376, "dur": 94.638, + "args": { + "External id": 233619,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097319704.492, "dur": 27.055, + "args": { + "External id": 233620,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097319749.910, "dur": 28.230, + "args": { + "External id": 233621,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097319798.827, "dur": 20.062, + "args": { + "External id": 233622,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097319830.064, "dur": 14.866, + "args": { + "External id": 233623,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097319853.242, "dur": 35.819, + "args": { + "External id": 233624,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097319892.201, "dur": 33.009, + "args": { + "External id": 233625,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097319954.963, "dur": 186.633, + "args": { + "External id": 233626,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097320045.677, "dur": 6.797, + "args": { + "External id": 233627,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097320054.434, "dur": 3.115, + "args": { + "External id": 233628,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097320177.136, "dur": 26.073, + "args": { + "External id": 233629,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097320215.960, "dur": 13.902, + "args": { + "External id": 233630,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097320237.977, "dur": 39.085, + "args": { + "External id": 233631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097320282.541, "dur": 34.886, + "args": { + "External id": 233632,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097320323.979, "dur": 23.249, + "args": { + "External id": 233633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097320352.958, "dur": 31.650, + "args": { + "External id": 233634,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097320389.859, "dur": 22.035, + "args": { + "External id": 233635,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097320418.447, "dur": 29.131, + "args": { + "External id": 233636,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097320462.444, "dur": 21.238, + "args": { + "External id": 233637,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097320499.055, "dur": 40.210, + "args": { + "External id": 233638,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097320559.443, "dur": 20.169, + "args": { + "External id": 233639,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097320593.260, "dur": 15.638, + "args": { + "External id": 233640,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097320667.384, "dur": 20.369, + "args": { + "External id": 233641,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320769.382, "dur": 18.021, + "args": { + "External id": 233642,"Record function id": 0, "Ev Idx": 2217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320772.694, "dur": 13.647, + "args": { + "External id": 233643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320780.160, "dur": 5.177, + "args": { + "External id": 233644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320781.335, "dur": 3.896, + "args": { + "External id": 233645,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320791.103, "dur": 4.559, + "args": { + "External id": 233646,"Record function id": 0, "Ev Idx": 2221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320792.437, "dur": 2.777, + "args": { + "External id": 233647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320793.359, "dur": 1.330, + "args": { + "External id": 233648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320793.875, "dur": 0.737, + "args": { + "External id": 233649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320798.890, "dur": 4.520, + "args": { + "External id": 233650,"Record function id": 0, "Ev Idx": 2225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320800.552, "dur": 2.451, + "args": { + "External id": 233651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320801.203, "dur": 1.394, + "args": { + "External id": 233652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320801.652, "dur": 0.860, + "args": { + "External id": 233653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320806.619, "dur": 6.107, + "args": { + "External id": 233654,"Record function id": 0, "Ev Idx": 2229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320808.125, "dur": 4.184, + "args": { + "External id": 233655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320808.898, "dur": 2.848, + "args": { + "External id": 233656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320809.254, "dur": 2.422, + "args": { + "External id": 233657,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320815.783, "dur": 4.519, + "args": { + "External id": 233658,"Record function id": 0, "Ev Idx": 2233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320817.129, "dur": 2.758, + "args": { + "External id": 233659,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320817.594, "dur": 1.874, + "args": { + "External id": 233660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320818.248, "dur": 1.125, + "args": { + "External id": 233661,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320823.299, "dur": 3.869, + "args": { + "External id": 233662,"Record function id": 0, "Ev Idx": 2237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320824.522, "dur": 2.233, + "args": { + "External id": 233663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320825.322, "dur": 1.034, + "args": { + "External id": 233664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320825.677, "dur": 0.605, + "args": { + "External id": 233665,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320830.196, "dur": 12.229, + "args": { + "External id": 233666,"Record function id": 0, "Ev Idx": 2241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320831.363, "dur": 10.619, + "args": { + "External id": 233667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320840.185, "dur": 1.280, + "args": { + "External id": 233668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320840.468, "dur": 0.922, + "args": { + "External id": 233669,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320845.607, "dur": 4.111, + "args": { + "External id": 233670,"Record function id": 0, "Ev Idx": 2245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320846.677, "dur": 2.612, + "args": { + "External id": 233671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320847.405, "dur": 1.241, + "args": { + "External id": 233672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320848.122, "dur": 0.451, + "args": { + "External id": 233673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320852.794, "dur": 4.284, + "args": { + "External id": 233674,"Record function id": 0, "Ev Idx": 2249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097320854.074, "dur": 2.595, + "args": { + "External id": 233675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320854.732, "dur": 1.295, + "args": { + "External id": 233676,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097320855.092, "dur": 0.862, + "args": { + "External id": 233677,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097320860.838, "dur": 36570.021, + "args": { + "External id": 233678,"Record function id": 0, "Sequence number": 959139, "Fwd thread id": 1, "Ev Idx": 2253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097320862.418, "dur": 36560.374, + "args": { + "External id": 233679,"Sequence number": 959139, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2254 + } + }, + { + "ph": "f", "id": 37, "pid": 2070552, "tid": 2107648, "ts": 5327097320862.418, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 2070552, "tid": 2107648, + "ts": 5327097320889.717, "dur": 41.235, + "args": { + "External id": 233680,"Record function id": 0, "Ev Idx": 2255 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 2070552, "tid": 2107648, + "ts": 5327097320938.604, "dur": 89.192, + "args": { + "External id": 233681,"Record function id": 0, "Ev Idx": 2256 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 2070552, "tid": 2107648, + "ts": 5327097321034.969, "dur": 36380.729, + "args": { + "External id": 233682,"Record function id": 0, "Ev Idx": 2257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097321124.622, "dur": 7.428, + "args": { + "External id": 233683,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097321141.888, "dur": 4.919, + "args": { + "External id": 233684,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097321160.870, "dur": 35375.991, + "args": { + "External id": 233685,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097321173.811, "dur": 35354.682, + "args": { + "External id": 233686,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097321211.869, "dur": 16.010, + "args": { + "External id": 233687,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097321233.749, "dur": 35257.217, + "args": { + "External id": 233688,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097321236.191, "dur": 35254.103, + "args": { + "External id": 233689,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097321240.281, "dur": 4.841, + "args": { + "External id": 233690,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097321246.626, "dur": 35240.351, + "args": { + "External id": 233691,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097356645.593, "dur": 9.383, + "args": { + "External id": 233692,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097356648.107, "dur": 6.325, + "args": { + "External id": 233693,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097356682.709, "dur": 448.497, + "args": { + "External id": 233694,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097356705.276, "dur": 420.883, + "args": { + "External id": 233695,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2270, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097356715.168, "dur": 405.199, + "args": { + "External id": 233696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097357150.456, "dur": 2.098, + "args": { + "External id": 233697,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2272, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097357215.868, "dur": 6.730, + "args": { + "External id": 233698,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097357264.465, "dur": 1.505, + "args": { + "External id": 233699,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097357282.014, "dur": 1.428, + "args": { + "External id": 233700,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097357296.800, "dur": 0.991, + "args": { + "External id": 233701,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097357309.013, "dur": 0.993, + "args": { + "External id": 233702,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097357319.998, "dur": 1.010, + "args": { + "External id": 233703,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097357331.122, "dur": 1.040, + "args": { + "External id": 233704,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097357344.536, "dur": 1.894, + "args": { + "External id": 233705,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097357355.618, "dur": 0.827, + "args": { + "External id": 233706,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097357444.969, "dur": 2694.631, + "args": { + "External id": 233707,"Record function id": 0, "Ev Idx": 2282 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 2070552, "tid": 2107648, + "ts": 5327097357464.416, "dur": 981.630, + "args": { + "External id": 233708,"Record function id": 0, "Ev Idx": 2283 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2070552, "tid": 2107648, + "ts": 5327097357478.248, "dur": 350.041, + "args": { + "External id": 233709,"Record function id": 0, "Ev Idx": 2284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097357559.215, "dur": 4.325, + "args": { + "External id": 233710,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097357566.847, "dur": 0.966, + "args": { + "External id": 233711,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097357569.664, "dur": 1.144, + "args": { + "External id": 233712,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097357572.640, "dur": 2.167, + "args": { + "External id": 233713,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097357576.085, "dur": 0.744, + "args": { + "External id": 233714,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097357577.977, "dur": 0.781, + "args": { + "External id": 233715,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097357580.404, "dur": 1.407, + "args": { + "External id": 233716,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097357583.349, "dur": 0.669, + "args": { + "External id": 233717,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097357585.407, "dur": 0.965, + "args": { + "External id": 233718,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097357587.686, "dur": 0.678, + "args": { + "External id": 233719,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097357606.333, "dur": 190.330, + "args": { + "External id": 233720,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097357658.438, "dur": 133.630, + "args": { + "External id": 233721,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097357673.306, "dur": 13.072, + "args": { + "External id": 233722,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097357690.555, "dur": 72.579, + "args": { + "External id": 233723,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097357693.019, "dur": 69.827, + "args": { + "External id": 233724,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097357696.678, "dur": 6.912, + "args": { + "External id": 233725,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097357710.587, "dur": 51.766, + "args": { + "External id": 233726,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2301 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 2070552, "tid": 2107648, + "ts": 5327097357917.210, "dur": 520.707, + "args": { + "External id": 233727,"Record function id": 0, "Ev Idx": 2302 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2070552, "tid": 2107648, + "ts": 5327097357935.128, "dur": 491.239, + "args": { + "External id": 233728,"Record function id": 0, "Ev Idx": 2303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097358010.412, "dur": 6.291, + "args": { + "External id": 233729,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097358033.338, "dur": 24.770, + "args": { + "External id": 233730,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358037.535, "dur": 1.492, + "args": { + "External id": 233731,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358041.042, "dur": 0.694, + "args": { + "External id": 233732,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358042.790, "dur": 0.932, + "args": { + "External id": 233733,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358044.681, "dur": 0.564, + "args": { + "External id": 233734,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358046.320, "dur": 0.783, + "args": { + "External id": 233735,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358047.885, "dur": 0.394, + "args": { + "External id": 233736,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358049.360, "dur": 1.919, + "args": { + "External id": 233737,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358051.956, "dur": 0.358, + "args": { + "External id": 233738,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358053.430, "dur": 0.363, + "args": { + "External id": 233739,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097358068.408, "dur": 34.415, + "args": { + "External id": 233740,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097358132.182, "dur": 86.901, + "args": { + "External id": 233741,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097358141.914, "dur": 2.833, + "args": { + "External id": 233742,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097358149.374, "dur": 9.089, + "args": { + "External id": 233743,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097358153.755, "dur": 4.308, + "args": { + "External id": 233744,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358156.369, "dur": 0.463, + "args": { + "External id": 233745,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097358164.902, "dur": 18.381, + "args": { + "External id": 233746,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358166.578, "dur": 0.617, + "args": { + "External id": 233747,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358168.144, "dur": 0.594, + "args": { + "External id": 233748,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358170.115, "dur": 0.534, + "args": { + "External id": 233749,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358171.519, "dur": 1.772, + "args": { + "External id": 233750,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358174.412, "dur": 0.341, + "args": { + "External id": 233751,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358175.535, "dur": 0.547, + "args": { + "External id": 233752,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358177.202, "dur": 0.387, + "args": { + "External id": 233753,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358178.538, "dur": 0.153, + "args": { + "External id": 233754,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097358179.546, "dur": 0.332, + "args": { + "External id": 233755,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097358192.331, "dur": 19.379, + "args": { + "External id": 233756,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097358261.594, "dur": 103.184, + "args": { + "External id": 233757,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097358281.170, "dur": 80.593, + "args": { + "External id": 233758,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2333, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097358290.045, "dur": 67.217, + "args": { + "External id": 233759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097358377.982, "dur": 1.500, + "args": { + "External id": 233760,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2335, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097358452.913, "dur": 1664.910, + "args": { + "External id": 233761,"Sequence number": 959138, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2336 + } + }, + { + "ph": "f", "id": 38, "pid": 2070552, "tid": 2107648, "ts": 5327097358452.913, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097358561.192, "dur": 146.042, + "args": { + "External id": 233762,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097358750.289, "dur": 40.749, + "args": { + "External id": 233763,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097358808.758, "dur": 54.378, + "args": { + "External id": 233764,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097358872.981, "dur": 33.628, + "args": { + "External id": 233765,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097358914.656, "dur": 46.152, + "args": { + "External id": 233766,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097358967.544, "dur": 46.523, + "args": { + "External id": 233767,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097359025.255, "dur": 45.386, + "args": { + "External id": 233768,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097359092.021, "dur": 35.078, + "args": { + "External id": 233769,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097359145.976, "dur": 27.464, + "args": { + "External id": 233770,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097359192.313, "dur": 19.744, + "args": { + "External id": 233771,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097359222.270, "dur": 15.457, + "args": { + "External id": 233772,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097359245.169, "dur": 29.325, + "args": { + "External id": 233773,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097359277.358, "dur": 32.567, + "args": { + "External id": 233774,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097359337.208, "dur": 166.419, + "args": { + "External id": 233775,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097359412.666, "dur": 5.506, + "args": { + "External id": 233776,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097359420.094, "dur": 2.691, + "args": { + "External id": 233777,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097359536.463, "dur": 27.364, + "args": { + "External id": 233778,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097359574.509, "dur": 14.819, + "args": { + "External id": 233779,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097359597.389, "dur": 82.748, + "args": { + "External id": 233780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097359688.637, "dur": 40.609, + "args": { + "External id": 233781,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097359739.106, "dur": 22.414, + "args": { + "External id": 233782,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097359765.763, "dur": 30.719, + "args": { + "External id": 233783,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097359801.964, "dur": 22.131, + "args": { + "External id": 233784,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097359831.347, "dur": 29.034, + "args": { + "External id": 233785,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097359879.032, "dur": 36.797, + "args": { + "External id": 233786,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097359940.216, "dur": 28.768, + "args": { + "External id": 233787,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097360001.199, "dur": 20.055, + "args": { + "External id": 233788,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097360042.388, "dur": 16.059, + "args": { + "External id": 233789,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097360070.427, "dur": 16.330, + "args": { + "External id": 233790,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360161.356, "dur": 14.439, + "args": { + "External id": 233791,"Record function id": 0, "Ev Idx": 2366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360164.389, "dur": 10.396, + "args": { + "External id": 233792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360168.518, "dur": 5.426, + "args": { + "External id": 233793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360169.642, "dur": 4.178, + "args": { + "External id": 233794,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360179.723, "dur": 4.577, + "args": { + "External id": 233795,"Record function id": 0, "Ev Idx": 2370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360181.036, "dur": 2.851, + "args": { + "External id": 233796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360181.982, "dur": 1.376, + "args": { + "External id": 233797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360182.459, "dur": 0.825, + "args": { + "External id": 233798,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360187.444, "dur": 6.168, + "args": { + "External id": 233799,"Record function id": 0, "Ev Idx": 2374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360188.534, "dur": 4.659, + "args": { + "External id": 233800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360189.999, "dur": 2.791, + "args": { + "External id": 233801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360190.397, "dur": 2.291, + "args": { + "External id": 233802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360196.712, "dur": 4.611, + "args": { + "External id": 233803,"Record function id": 0, "Ev Idx": 2378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360198.023, "dur": 2.861, + "args": { + "External id": 233804,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360198.940, "dur": 1.288, + "args": { + "External id": 233805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360199.210, "dur": 0.952, + "args": { + "External id": 233806,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360204.424, "dur": 4.379, + "args": { + "External id": 233807,"Record function id": 0, "Ev Idx": 2382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360205.748, "dur": 2.617, + "args": { + "External id": 233808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360206.904, "dur": 1.004, + "args": { + "External id": 233809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360207.227, "dur": 0.614, + "args": { + "External id": 233810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360211.842, "dur": 3.793, + "args": { + "External id": 233811,"Record function id": 0, "Ev Idx": 2386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360213.152, "dur": 2.068, + "args": { + "External id": 233812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360213.684, "dur": 1.152, + "args": { + "External id": 233813,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360214.076, "dur": 0.684, + "args": { + "External id": 233814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360218.717, "dur": 4.898, + "args": { + "External id": 233815,"Record function id": 0, "Ev Idx": 2390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360219.764, "dur": 3.418, + "args": { + "External id": 233816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360220.787, "dur": 1.961, + "args": { + "External id": 233817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360221.769, "dur": 0.904, + "args": { + "External id": 233818,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360227.272, "dur": 3.694, + "args": { + "External id": 233819,"Record function id": 0, "Ev Idx": 2394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360228.365, "dur": 2.183, + "args": { + "External id": 233820,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360229.175, "dur": 0.984, + "args": { + "External id": 233821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360229.534, "dur": 0.550, + "args": { + "External id": 233822,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360234.465, "dur": 4.536, + "args": { + "External id": 233823,"Record function id": 0, "Ev Idx": 2398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097360235.592, "dur": 2.957, + "args": { + "External id": 233824,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360236.130, "dur": 1.658, + "args": { + "External id": 233825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097360237.092, "dur": 0.620, + "args": { + "External id": 233826,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097360242.790, "dur": 35962.538, + "args": { + "External id": 233827,"Record function id": 0, "Sequence number": 959137, "Fwd thread id": 1, "Ev Idx": 2402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097360244.198, "dur": 35952.793, + "args": { + "External id": 233828,"Sequence number": 959137, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2403 + } + }, + { + "ph": "f", "id": 39, "pid": 2070552, "tid": 2107648, "ts": 5327097360244.198, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 2070552, "tid": 2107648, + "ts": 5327097360270.718, "dur": 38.461, + "args": { + "External id": 233829,"Record function id": 0, "Ev Idx": 2404 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 2070552, "tid": 2107648, + "ts": 5327097360316.777, "dur": 69.547, + "args": { + "External id": 233830,"Record function id": 0, "Ev Idx": 2405 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 2070552, "tid": 2107648, + "ts": 5327097360392.235, "dur": 35797.235, + "args": { + "External id": 233831,"Record function id": 0, "Ev Idx": 2406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097360481.731, "dur": 6.242, + "args": { + "External id": 233832,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097360496.927, "dur": 5.450, + "args": { + "External id": 233833,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097360515.484, "dur": 34800.243, + "args": { + "External id": 233834,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097360528.055, "dur": 34778.666, + "args": { + "External id": 233835,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097360565.259, "dur": 13.509, + "args": { + "External id": 233836,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097360584.741, "dur": 34684.409, + "args": { + "External id": 233837,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097360587.288, "dur": 34681.096, + "args": { + "External id": 233838,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097360590.988, "dur": 5.159, + "args": { + "External id": 233839,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097360597.444, "dur": 34667.270, + "args": { + "External id": 233840,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097395401.048, "dur": 7.791, + "args": { + "External id": 233841,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097395403.699, "dur": 4.785, + "args": { + "External id": 233842,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097395435.526, "dur": 446.422, + "args": { + "External id": 233843,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097395460.667, "dur": 415.880, + "args": { + "External id": 233844,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2419, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097395470.502, "dur": 400.615, + "args": { + "External id": 233845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097395903.501, "dur": 2.130, + "args": { + "External id": 233846,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2421, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097395965.724, "dur": 6.377, + "args": { + "External id": 233847,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396035.519, "dur": 1.941, + "args": { + "External id": 233848,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396053.408, "dur": 2.936, + "args": { + "External id": 233849,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396069.543, "dur": 1.007, + "args": { + "External id": 233850,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396081.404, "dur": 0.919, + "args": { + "External id": 233851,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396092.765, "dur": 0.893, + "args": { + "External id": 233852,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396103.953, "dur": 2.851, + "args": { + "External id": 233853,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396118.182, "dur": 1.772, + "args": { + "External id": 233854,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396130.001, "dur": 1.260, + "args": { + "External id": 233855,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097396220.313, "dur": 2653.760, + "args": { + "External id": 233856,"Record function id": 0, "Ev Idx": 2431 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 2070552, "tid": 2107648, + "ts": 5327097396240.012, "dur": 1001.482, + "args": { + "External id": 233857,"Record function id": 0, "Ev Idx": 2432 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2070552, "tid": 2107648, + "ts": 5327097396255.072, "dur": 298.845, + "args": { + "External id": 233858,"Record function id": 0, "Ev Idx": 2433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097396339.134, "dur": 3.978, + "args": { + "External id": 233859,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097396346.214, "dur": 1.002, + "args": { + "External id": 233860,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097396349.130, "dur": 1.989, + "args": { + "External id": 233861,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097396353.003, "dur": 0.863, + "args": { + "External id": 233862,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097396355.245, "dur": 1.159, + "args": { + "External id": 233863,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097396357.847, "dur": 0.744, + "args": { + "External id": 233864,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097396360.116, "dur": 1.423, + "args": { + "External id": 233865,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097396362.873, "dur": 0.873, + "args": { + "External id": 233866,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097396365.286, "dur": 0.653, + "args": { + "External id": 233867,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097396367.338, "dur": 0.756, + "args": { + "External id": 233868,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097396386.476, "dur": 141.815, + "args": { + "External id": 233869,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097396401.577, "dur": 122.608, + "args": { + "External id": 233870,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097396414.710, "dur": 13.744, + "args": { + "External id": 233871,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097396432.456, "dur": 64.376, + "args": { + "External id": 233872,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097396434.855, "dur": 61.694, + "args": { + "External id": 233873,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396438.326, "dur": 6.539, + "args": { + "External id": 233874,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097396446.567, "dur": 49.344, + "args": { + "External id": 233875,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2450 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 2070552, "tid": 2107648, + "ts": 5327097396678.348, "dur": 554.710, + "args": { + "External id": 233876,"Record function id": 0, "Ev Idx": 2451 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2070552, "tid": 2107648, + "ts": 5327097396696.702, "dur": 523.299, + "args": { + "External id": 233877,"Record function id": 0, "Ev Idx": 2452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097396760.595, "dur": 5.904, + "args": { + "External id": 233878,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097396782.577, "dur": 26.442, + "args": { + "External id": 233879,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396787.245, "dur": 1.749, + "args": { + "External id": 233880,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396790.864, "dur": 0.314, + "args": { + "External id": 233881,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396792.208, "dur": 0.522, + "args": { + "External id": 233882,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396794.380, "dur": 0.677, + "args": { + "External id": 233883,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396796.249, "dur": 0.944, + "args": { + "External id": 233884,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396798.433, "dur": 2.061, + "args": { + "External id": 233885,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396801.507, "dur": 0.403, + "args": { + "External id": 233886,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396803.420, "dur": 0.373, + "args": { + "External id": 233887,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396804.609, "dur": 0.608, + "args": { + "External id": 233888,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097396818.850, "dur": 36.353, + "args": { + "External id": 233889,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097396887.723, "dur": 106.678, + "args": { + "External id": 233890,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097396897.237, "dur": 3.085, + "args": { + "External id": 233891,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097396905.014, "dur": 9.699, + "args": { + "External id": 233892,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097396909.569, "dur": 4.708, + "args": { + "External id": 233893,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396912.533, "dur": 0.680, + "args": { + "External id": 233894,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097396920.929, "dur": 21.196, + "args": { + "External id": 233895,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396922.526, "dur": 0.662, + "args": { + "External id": 233896,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396924.400, "dur": 0.629, + "args": { + "External id": 233897,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396925.962, "dur": 2.210, + "args": { + "External id": 233898,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396929.540, "dur": 0.601, + "args": { + "External id": 233899,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396931.298, "dur": 0.617, + "args": { + "External id": 233900,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396933.439, "dur": 0.391, + "args": { + "External id": 233901,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396934.592, "dur": 0.480, + "args": { + "External id": 233902,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396936.643, "dur": 0.429, + "args": { + "External id": 233903,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097396938.505, "dur": 0.627, + "args": { + "External id": 233904,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097396952.439, "dur": 20.456, + "args": { + "External id": 233905,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097397040.326, "dur": 114.071, + "args": { + "External id": 233906,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097397065.929, "dur": 85.245, + "args": { + "External id": 233907,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2482, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097397074.832, "dur": 71.032, + "args": { + "External id": 233908,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097397168.771, "dur": 1.873, + "args": { + "External id": 233909,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2484, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097397248.251, "dur": 1601.615, + "args": { + "External id": 233910,"Sequence number": 959136, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2485 + } + }, + { + "ph": "f", "id": 40, "pid": 2070552, "tid": 2107648, "ts": 5327097397248.251, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097397357.773, "dur": 101.448, + "args": { + "External id": 233911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097397498.084, "dur": 39.237, + "args": { + "External id": 233912,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097397553.122, "dur": 48.266, + "args": { + "External id": 233913,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097397611.587, "dur": 76.622, + "args": { + "External id": 233914,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097397700.601, "dur": 47.413, + "args": { + "External id": 233915,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097397754.949, "dur": 28.404, + "args": { + "External id": 233916,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097397791.149, "dur": 41.707, + "args": { + "External id": 233917,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097397856.180, "dur": 25.153, + "args": { + "External id": 233918,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097397899.587, "dur": 28.427, + "args": { + "External id": 233919,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097397948.577, "dur": 19.613, + "args": { + "External id": 233920,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097397996.639, "dur": 19.583, + "args": { + "External id": 233921,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097398025.486, "dur": 33.844, + "args": { + "External id": 233922,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097398062.453, "dur": 33.850, + "args": { + "External id": 233923,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097398123.823, "dur": 163.743, + "args": { + "External id": 233924,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097398195.003, "dur": 7.282, + "args": { + "External id": 233925,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097398204.337, "dur": 3.440, + "args": { + "External id": 233926,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097398319.376, "dur": 28.310, + "args": { + "External id": 233927,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097398359.019, "dur": 13.633, + "args": { + "External id": 233928,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097398379.416, "dur": 36.174, + "args": { + "External id": 233929,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097398421.002, "dur": 33.999, + "args": { + "External id": 233930,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097398461.878, "dur": 22.265, + "args": { + "External id": 233931,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097398490.496, "dur": 29.797, + "args": { + "External id": 233932,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097398525.771, "dur": 22.110, + "args": { + "External id": 233933,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097398553.751, "dur": 29.339, + "args": { + "External id": 233934,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097398599.154, "dur": 22.373, + "args": { + "External id": 233935,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097398674.450, "dur": 27.923, + "args": { + "External id": 233936,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097398730.178, "dur": 24.264, + "args": { + "External id": 233937,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097398772.171, "dur": 15.637, + "args": { + "External id": 233938,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097398800.649, "dur": 16.585, + "args": { + "External id": 233939,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398899.750, "dur": 15.831, + "args": { + "External id": 233940,"Record function id": 0, "Ev Idx": 2515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398903.748, "dur": 10.877, + "args": { + "External id": 233941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398907.769, "dur": 5.980, + "args": { + "External id": 233942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398909.274, "dur": 4.387, + "args": { + "External id": 233943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398919.248, "dur": 5.050, + "args": { + "External id": 233944,"Record function id": 0, "Ev Idx": 2519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398921.027, "dur": 2.873, + "args": { + "External id": 233945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398921.805, "dur": 1.442, + "args": { + "External id": 233946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398922.430, "dur": 0.744, + "args": { + "External id": 233947,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398927.513, "dur": 4.993, + "args": { + "External id": 233948,"Record function id": 0, "Ev Idx": 2523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398928.837, "dur": 3.275, + "args": { + "External id": 233949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398929.707, "dur": 1.875, + "args": { + "External id": 233950,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398930.152, "dur": 1.335, + "args": { + "External id": 233951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398935.656, "dur": 4.305, + "args": { + "External id": 233952,"Record function id": 0, "Ev Idx": 2527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398937.076, "dur": 2.490, + "args": { + "External id": 233953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398937.888, "dur": 1.259, + "args": { + "External id": 233954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398938.179, "dur": 0.879, + "args": { + "External id": 233955,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398942.974, "dur": 4.576, + "args": { + "External id": 233956,"Record function id": 0, "Ev Idx": 2531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398944.186, "dur": 2.953, + "args": { + "External id": 233957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398944.901, "dur": 1.804, + "args": { + "External id": 233958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398945.671, "dur": 0.958, + "args": { + "External id": 233959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398950.599, "dur": 6.354, + "args": { + "External id": 233960,"Record function id": 0, "Ev Idx": 2535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398953.816, "dur": 2.734, + "args": { + "External id": 233961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398954.519, "dur": 1.607, + "args": { + "External id": 233962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398955.520, "dur": 0.528, + "args": { + "External id": 233963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398960.265, "dur": 5.144, + "args": { + "External id": 233964,"Record function id": 0, "Ev Idx": 2539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398961.449, "dur": 3.553, + "args": { + "External id": 233965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398962.154, "dur": 2.433, + "args": { + "External id": 233966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398962.522, "dur": 1.978, + "args": { + "External id": 233967,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398968.425, "dur": 4.407, + "args": { + "External id": 233968,"Record function id": 0, "Ev Idx": 2543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398969.551, "dur": 2.874, + "args": { + "External id": 233969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398970.222, "dur": 1.795, + "args": { + "External id": 233970,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398971.097, "dur": 0.847, + "args": { + "External id": 233971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398975.801, "dur": 22.728, + "args": { + "External id": 233972,"Record function id": 0, "Ev Idx": 2547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097398993.194, "dur": 4.544, + "args": { + "External id": 233973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398994.347, "dur": 2.579, + "args": { + "External id": 233974,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097398995.376, "dur": 1.280, + "args": { + "External id": 233975,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097399004.428, "dur": 36943.624, + "args": { + "External id": 233976,"Record function id": 0, "Sequence number": 959135, "Fwd thread id": 1, "Ev Idx": 2551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097399006.251, "dur": 36933.671, + "args": { + "External id": 233977,"Sequence number": 959135, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2552 + } + }, + { + "ph": "f", "id": 41, "pid": 2070552, "tid": 2107648, "ts": 5327097399006.251, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 2070552, "tid": 2107648, + "ts": 5327097399035.245, "dur": 37.133, + "args": { + "External id": 233978,"Record function id": 0, "Ev Idx": 2553 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 2070552, "tid": 2107648, + "ts": 5327097399080.051, "dur": 66.197, + "args": { + "External id": 233979,"Record function id": 0, "Ev Idx": 2554 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 2070552, "tid": 2107648, + "ts": 5327097399152.050, "dur": 36779.934, + "args": { + "External id": 233980,"Record function id": 0, "Ev Idx": 2555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097399235.412, "dur": 6.387, + "args": { + "External id": 233981,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097399250.704, "dur": 4.837, + "args": { + "External id": 233982,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097399269.131, "dur": 35832.405, + "args": { + "External id": 233983,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097399281.542, "dur": 35810.543, + "args": { + "External id": 233984,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097399329.656, "dur": 14.155, + "args": { + "External id": 233985,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097399349.691, "dur": 35702.760, + "args": { + "External id": 233986,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097399351.924, "dur": 35699.845, + "args": { + "External id": 233987,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097399356.024, "dur": 4.502, + "args": { + "External id": 233988,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097399362.078, "dur": 35685.973, + "args": { + "External id": 233989,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097435190.634, "dur": 8.585, + "args": { + "External id": 233990,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097435193.142, "dur": 5.665, + "args": { + "External id": 233991,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097435226.702, "dur": 388.852, + "args": { + "External id": 233992,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097435252.703, "dur": 358.140, + "args": { + "External id": 233993,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2568, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097435262.886, "dur": 342.592, + "args": { + "External id": 233994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097435665.273, "dur": 2.963, + "args": { + "External id": 233995,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2570, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097435729.134, "dur": 6.602, + "args": { + "External id": 233996,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097435777.300, "dur": 2.968, + "args": { + "External id": 233997,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097435796.127, "dur": 1.236, + "args": { + "External id": 233998,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097435809.619, "dur": 0.737, + "args": { + "External id": 233999,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097435822.016, "dur": 0.575, + "args": { + "External id": 234000,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097435832.691, "dur": 2.588, + "args": { + "External id": 234001,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097435845.988, "dur": 0.968, + "args": { + "External id": 234002,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097435857.661, "dur": 1.736, + "args": { + "External id": 234003,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097435869.992, "dur": 0.730, + "args": { + "External id": 234004,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097435963.311, "dur": 2624.658, + "args": { + "External id": 234005,"Record function id": 0, "Ev Idx": 2580 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 2070552, "tid": 2107648, + "ts": 5327097435999.100, "dur": 966.623, + "args": { + "External id": 234006,"Record function id": 0, "Ev Idx": 2581 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2070552, "tid": 2107648, + "ts": 5327097436014.563, "dur": 305.148, + "args": { + "External id": 234007,"Record function id": 0, "Ev Idx": 2582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097436100.636, "dur": 5.002, + "args": { + "External id": 234008,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097436109.071, "dur": 0.914, + "args": { + "External id": 234009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097436112.265, "dur": 2.297, + "args": { + "External id": 234010,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097436116.534, "dur": 0.727, + "args": { + "External id": 234011,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097436118.728, "dur": 0.813, + "args": { + "External id": 234012,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097436121.046, "dur": 0.753, + "args": { + "External id": 234013,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097436123.479, "dur": 1.553, + "args": { + "External id": 234014,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097436126.470, "dur": 0.916, + "args": { + "External id": 234015,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097436128.914, "dur": 0.748, + "args": { + "External id": 234016,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097436131.359, "dur": 0.615, + "args": { + "External id": 234017,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097436149.267, "dur": 142.154, + "args": { + "External id": 234018,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097436164.935, "dur": 122.503, + "args": { + "External id": 234019,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097436177.965, "dur": 13.357, + "args": { + "External id": 234020,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097436195.720, "dur": 63.183, + "args": { + "External id": 234021,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097436197.922, "dur": 60.669, + "args": { + "External id": 234022,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436201.087, "dur": 4.885, + "args": { + "External id": 234023,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097436207.397, "dur": 50.659, + "args": { + "External id": 234024,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2599 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 2070552, "tid": 2107648, + "ts": 5327097436408.108, "dur": 550.377, + "args": { + "External id": 234025,"Record function id": 0, "Ev Idx": 2600 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2070552, "tid": 2107648, + "ts": 5327097436423.756, "dur": 522.697, + "args": { + "External id": 234026,"Record function id": 0, "Ev Idx": 2601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097436483.491, "dur": 4.519, + "args": { + "External id": 234027,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097436503.573, "dur": 23.336, + "args": { + "External id": 234028,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436507.630, "dur": 1.527, + "args": { + "External id": 234029,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436510.304, "dur": 0.675, + "args": { + "External id": 234030,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436512.267, "dur": 0.323, + "args": { + "External id": 234031,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436513.790, "dur": 0.432, + "args": { + "External id": 234032,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436515.225, "dur": 0.345, + "args": { + "External id": 234033,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436516.403, "dur": 0.426, + "args": { + "External id": 234034,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436518.060, "dur": 2.055, + "args": { + "External id": 234035,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436521.123, "dur": 0.765, + "args": { + "External id": 234036,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436523.047, "dur": 0.349, + "args": { + "External id": 234037,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097436536.699, "dur": 30.341, + "args": { + "External id": 234038,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097436595.589, "dur": 136.958, + "args": { + "External id": 234039,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097436604.797, "dur": 3.019, + "args": { + "External id": 234040,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097436613.846, "dur": 45.958, + "args": { + "External id": 234041,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097436617.871, "dur": 41.379, + "args": { + "External id": 234042,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436656.359, "dur": 1.112, + "args": { + "External id": 234043,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097436668.100, "dur": 20.739, + "args": { + "External id": 234044,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436669.791, "dur": 0.434, + "args": { + "External id": 234045,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436672.007, "dur": 0.400, + "args": { + "External id": 234046,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436673.880, "dur": 0.373, + "args": { + "External id": 234047,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436675.394, "dur": 2.986, + "args": { + "External id": 234048,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436679.295, "dur": 0.565, + "args": { + "External id": 234049,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436681.044, "dur": 0.403, + "args": { + "External id": 234050,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436682.306, "dur": 0.332, + "args": { + "External id": 234051,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436683.863, "dur": 0.650, + "args": { + "External id": 234052,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097436685.270, "dur": 0.308, + "args": { + "External id": 234053,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097436700.126, "dur": 24.447, + "args": { + "External id": 234054,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097436776.468, "dur": 107.068, + "args": { + "External id": 234055,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097436799.347, "dur": 81.065, + "args": { + "External id": 234056,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2631, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097436808.193, "dur": 68.523, + "args": { + "External id": 234057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097436896.782, "dur": 1.793, + "args": { + "External id": 234058,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2633, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097436972.705, "dur": 1594.455, + "args": { + "External id": 234059,"Sequence number": 959134, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2634 + } + }, + { + "ph": "f", "id": 42, "pid": 2070552, "tid": 2107648, "ts": 5327097436972.705, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097437099.059, "dur": 106.574, + "args": { + "External id": 234060,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097437245.641, "dur": 41.482, + "args": { + "External id": 234061,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097437302.695, "dur": 48.850, + "args": { + "External id": 234062,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097437360.579, "dur": 33.009, + "args": { + "External id": 234063,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097437399.688, "dur": 45.899, + "args": { + "External id": 234064,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097437453.980, "dur": 28.544, + "args": { + "External id": 234065,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097437490.516, "dur": 42.163, + "args": { + "External id": 234066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097437555.190, "dur": 23.858, + "args": { + "External id": 234067,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097437596.869, "dur": 66.030, + "args": { + "External id": 234068,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097437687.991, "dur": 21.583, + "args": { + "External id": 234069,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097437721.271, "dur": 16.328, + "args": { + "External id": 234070,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097437747.965, "dur": 35.110, + "args": { + "External id": 234071,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097437786.181, "dur": 34.164, + "args": { + "External id": 234072,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097437846.930, "dur": 184.883, + "args": { + "External id": 234073,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097437921.765, "dur": 5.762, + "args": { + "External id": 234074,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097437929.151, "dur": 2.896, + "args": { + "External id": 234075,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097438064.998, "dur": 28.478, + "args": { + "External id": 234076,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097438105.058, "dur": 14.454, + "args": { + "External id": 234077,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097438127.954, "dur": 39.144, + "args": { + "External id": 234078,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097438174.354, "dur": 38.356, + "args": { + "External id": 234079,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097438219.655, "dur": 22.172, + "args": { + "External id": 234080,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097438245.921, "dur": 32.486, + "args": { + "External id": 234081,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097438284.191, "dur": 21.588, + "args": { + "External id": 234082,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097438313.637, "dur": 29.050, + "args": { + "External id": 234083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097438358.658, "dur": 21.283, + "args": { + "External id": 234084,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097438396.159, "dur": 25.374, + "args": { + "External id": 234085,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097438448.003, "dur": 25.423, + "args": { + "External id": 234086,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097438493.342, "dur": 15.422, + "args": { + "External id": 234087,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097438520.991, "dur": 15.696, + "args": { + "External id": 234088,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438610.413, "dur": 65.741, + "args": { + "External id": 234089,"Record function id": 0, "Ev Idx": 2664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438617.088, "dur": 57.225, + "args": { + "External id": 234090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438667.164, "dur": 5.828, + "args": { + "External id": 234091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438668.430, "dur": 4.251, + "args": { + "External id": 234092,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438682.521, "dur": 5.364, + "args": { + "External id": 234093,"Record function id": 0, "Ev Idx": 2668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438684.320, "dur": 3.118, + "args": { + "External id": 234094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438685.366, "dur": 1.547, + "args": { + "External id": 234095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438686.010, "dur": 0.826, + "args": { + "External id": 234096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438691.170, "dur": 4.202, + "args": { + "External id": 234097,"Record function id": 0, "Ev Idx": 2672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438692.409, "dur": 2.545, + "args": { + "External id": 234098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438693.236, "dur": 1.179, + "args": { + "External id": 234099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438693.645, "dur": 0.675, + "args": { + "External id": 234100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438698.718, "dur": 5.620, + "args": { + "External id": 234101,"Record function id": 0, "Ev Idx": 2676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438699.820, "dur": 4.123, + "args": { + "External id": 234102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438700.532, "dur": 2.920, + "args": { + "External id": 234103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438700.855, "dur": 2.513, + "args": { + "External id": 234104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438707.415, "dur": 4.377, + "args": { + "External id": 234105,"Record function id": 0, "Ev Idx": 2680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438708.954, "dur": 2.425, + "args": { + "External id": 234106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438709.414, "dur": 1.560, + "args": { + "External id": 234107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438709.832, "dur": 1.059, + "args": { + "External id": 234108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438714.865, "dur": 4.012, + "args": { + "External id": 234109,"Record function id": 0, "Ev Idx": 2684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438716.283, "dur": 2.180, + "args": { + "External id": 234110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438716.718, "dur": 1.190, + "args": { + "External id": 234111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438717.177, "dur": 0.659, + "args": { + "External id": 234112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438722.012, "dur": 3.846, + "args": { + "External id": 234113,"Record function id": 0, "Ev Idx": 2688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438723.386, "dur": 2.066, + "args": { + "External id": 234114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438723.853, "dur": 1.075, + "args": { + "External id": 234115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438724.210, "dur": 0.644, + "args": { + "External id": 234116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438728.885, "dur": 4.132, + "args": { + "External id": 234117,"Record function id": 0, "Ev Idx": 2692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438730.302, "dur": 2.318, + "args": { + "External id": 234118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438730.931, "dur": 1.145, + "args": { + "External id": 234119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438731.310, "dur": 0.700, + "args": { + "External id": 234120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438736.074, "dur": 4.294, + "args": { + "External id": 234121,"Record function id": 0, "Ev Idx": 2696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097438737.257, "dur": 2.721, + "args": { + "External id": 234122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438738.070, "dur": 1.380, + "args": { + "External id": 234123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097438738.574, "dur": 0.809, + "args": { + "External id": 234124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097438744.548, "dur": 36778.653, + "args": { + "External id": 234125,"Record function id": 0, "Sequence number": 959133, "Fwd thread id": 1, "Ev Idx": 2700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097438746.045, "dur": 36768.397, + "args": { + "External id": 234126,"Sequence number": 959133, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2701 + } + }, + { + "ph": "f", "id": 43, "pid": 2070552, "tid": 2107648, "ts": 5327097438746.045, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 2070552, "tid": 2107648, + "ts": 5327097438774.558, "dur": 37.957, + "args": { + "External id": 234127,"Record function id": 0, "Ev Idx": 2702 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 2070552, "tid": 2107648, + "ts": 5327097438819.962, "dur": 72.602, + "args": { + "External id": 234128,"Record function id": 0, "Ev Idx": 2703 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 2070552, "tid": 2107648, + "ts": 5327097438899.251, "dur": 36608.091, + "args": { + "External id": 234129,"Record function id": 0, "Ev Idx": 2704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097439003.233, "dur": 7.907, + "args": { + "External id": 234130,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097439021.636, "dur": 4.934, + "args": { + "External id": 234131,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097439041.227, "dur": 35705.170, + "args": { + "External id": 234132,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097439055.699, "dur": 35682.149, + "args": { + "External id": 234133,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097439095.608, "dur": 14.034, + "args": { + "External id": 234134,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097439115.555, "dur": 35585.302, + "args": { + "External id": 234135,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097439117.878, "dur": 35582.292, + "args": { + "External id": 234136,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097439121.703, "dur": 5.340, + "args": { + "External id": 234137,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097439128.829, "dur": 35567.961, + "args": { + "External id": 234138,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097474832.052, "dur": 8.081, + "args": { + "External id": 234139,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097474834.600, "dur": 5.249, + "args": { + "External id": 234140,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097474866.844, "dur": 364.007, + "args": { + "External id": 234141,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097474891.703, "dur": 334.567, + "args": { + "External id": 234142,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2717, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097474901.874, "dur": 318.763, + "args": { + "External id": 234143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097475250.584, "dur": 2.336, + "args": { + "External id": 234144,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2719, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097475310.126, "dur": 6.175, + "args": { + "External id": 234145,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097475357.390, "dur": 2.890, + "args": { + "External id": 234146,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097475375.161, "dur": 1.131, + "args": { + "External id": 234147,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097475387.661, "dur": 0.782, + "args": { + "External id": 234148,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097475399.925, "dur": 0.883, + "args": { + "External id": 234149,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097475411.563, "dur": 2.194, + "args": { + "External id": 234150,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097475424.475, "dur": 0.817, + "args": { + "External id": 234151,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097475436.712, "dur": 1.544, + "args": { + "External id": 234152,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097475448.372, "dur": 0.956, + "args": { + "External id": 234153,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097475536.713, "dur": 2725.299, + "args": { + "External id": 234154,"Record function id": 0, "Ev Idx": 2729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 2070552, "tid": 2107648, + "ts": 5327097475556.291, "dur": 1008.086, + "args": { + "External id": 234155,"Record function id": 0, "Ev Idx": 2730 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2070552, "tid": 2107648, + "ts": 5327097475571.393, "dur": 348.593, + "args": { + "External id": 234156,"Record function id": 0, "Ev Idx": 2731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097475694.212, "dur": 4.659, + "args": { + "External id": 234157,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097475702.664, "dur": 0.704, + "args": { + "External id": 234158,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097475705.367, "dur": 2.924, + "args": { + "External id": 234159,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097475710.322, "dur": 0.540, + "args": { + "External id": 234160,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097475712.610, "dur": 0.614, + "args": { + "External id": 234161,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097475714.791, "dur": 0.628, + "args": { + "External id": 234162,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097475717.283, "dur": 1.619, + "args": { + "External id": 234163,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097475721.060, "dur": 0.630, + "args": { + "External id": 234164,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097475723.558, "dur": 0.651, + "args": { + "External id": 234165,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097475726.044, "dur": 0.677, + "args": { + "External id": 234166,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097475745.942, "dur": 144.057, + "args": { + "External id": 234167,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097475761.147, "dur": 124.788, + "args": { + "External id": 234168,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097475773.783, "dur": 13.358, + "args": { + "External id": 234169,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097475791.304, "dur": 66.097, + "args": { + "External id": 234170,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097475793.774, "dur": 63.340, + "args": { + "External id": 234171,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097475797.015, "dur": 5.615, + "args": { + "External id": 234172,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097475804.391, "dur": 52.217, + "args": { + "External id": 234173,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 2070552, "tid": 2107648, + "ts": 5327097476030.116, "dur": 526.111, + "args": { + "External id": 234174,"Record function id": 0, "Ev Idx": 2749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2070552, "tid": 2107648, + "ts": 5327097476048.008, "dur": 496.966, + "args": { + "External id": 234175,"Record function id": 0, "Ev Idx": 2750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097476109.811, "dur": 5.386, + "args": { + "External id": 234176,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097476131.594, "dur": 29.995, + "args": { + "External id": 234177,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476136.132, "dur": 1.814, + "args": { + "External id": 234178,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476140.191, "dur": 0.731, + "args": { + "External id": 234179,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476142.571, "dur": 0.384, + "args": { + "External id": 234180,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476144.225, "dur": 0.467, + "args": { + "External id": 234181,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476146.224, "dur": 0.560, + "args": { + "External id": 234182,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476148.268, "dur": 0.675, + "args": { + "External id": 234183,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476150.610, "dur": 2.644, + "args": { + "External id": 234184,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476154.812, "dur": 0.264, + "args": { + "External id": 234185,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476156.903, "dur": 0.879, + "args": { + "External id": 234186,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097476172.330, "dur": 33.023, + "args": { + "External id": 234187,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097476233.708, "dur": 95.198, + "args": { + "External id": 234188,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097476243.248, "dur": 2.942, + "args": { + "External id": 234189,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097476251.136, "dur": 9.588, + "args": { + "External id": 234190,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097476255.422, "dur": 4.908, + "args": { + "External id": 234191,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476258.531, "dur": 0.621, + "args": { + "External id": 234192,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097476267.237, "dur": 24.203, + "args": { + "External id": 234193,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476269.321, "dur": 0.702, + "args": { + "External id": 234194,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476271.436, "dur": 0.374, + "args": { + "External id": 234195,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476273.205, "dur": 0.391, + "args": { + "External id": 234196,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476274.610, "dur": 2.212, + "args": { + "External id": 234197,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476279.087, "dur": 0.804, + "args": { + "External id": 234198,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476280.871, "dur": 0.405, + "args": { + "External id": 234199,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476283.202, "dur": 0.636, + "args": { + "External id": 234200,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476285.097, "dur": 0.672, + "args": { + "External id": 234201,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097476287.755, "dur": 0.607, + "args": { + "External id": 234202,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097476301.939, "dur": 19.830, + "args": { + "External id": 234203,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097476369.224, "dur": 111.515, + "args": { + "External id": 234204,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097476396.393, "dur": 81.071, + "args": { + "External id": 234205,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2780, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097476405.062, "dur": 68.241, + "args": { + "External id": 234206,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097476497.796, "dur": 1.794, + "args": { + "External id": 234207,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2782, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097476571.134, "dur": 1669.910, + "args": { + "External id": 234208,"Sequence number": 959132, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2783 + } + }, + { + "ph": "f", "id": 44, "pid": 2070552, "tid": 2107648, "ts": 5327097476571.134, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097476719.048, "dur": 106.613, + "args": { + "External id": 234209,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097476868.444, "dur": 38.913, + "args": { + "External id": 234210,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097476923.549, "dur": 45.946, + "args": { + "External id": 234211,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097476995.311, "dur": 38.720, + "args": { + "External id": 234212,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097477041.631, "dur": 47.273, + "args": { + "External id": 234213,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097477098.822, "dur": 28.685, + "args": { + "External id": 234214,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097477135.278, "dur": 42.708, + "args": { + "External id": 234215,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097477201.520, "dur": 24.357, + "args": { + "External id": 234216,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097477246.735, "dur": 27.339, + "args": { + "External id": 234217,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097477294.837, "dur": 19.260, + "args": { + "External id": 234218,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097477325.674, "dur": 15.328, + "args": { + "External id": 234219,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097477350.593, "dur": 28.669, + "args": { + "External id": 234220,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097477382.128, "dur": 31.908, + "args": { + "External id": 234221,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097477440.773, "dur": 161.654, + "args": { + "External id": 234222,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097477512.519, "dur": 6.326, + "args": { + "External id": 234223,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097477520.660, "dur": 3.006, + "args": { + "External id": 234224,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097477684.282, "dur": 29.513, + "args": { + "External id": 234225,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097477727.314, "dur": 17.848, + "args": { + "External id": 234226,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097477754.778, "dur": 47.181, + "args": { + "External id": 234227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097477809.747, "dur": 37.122, + "args": { + "External id": 234228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097477853.478, "dur": 22.309, + "args": { + "External id": 234229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097477880.522, "dur": 29.349, + "args": { + "External id": 234230,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097477916.172, "dur": 22.318, + "args": { + "External id": 234231,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097477947.351, "dur": 62.174, + "args": { + "External id": 234232,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097478041.862, "dur": 24.889, + "args": { + "External id": 234233,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097478085.740, "dur": 30.380, + "args": { + "External id": 234234,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097478131.866, "dur": 17.576, + "args": { + "External id": 234235,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097478169.657, "dur": 15.166, + "args": { + "External id": 234236,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097478196.895, "dur": 16.073, + "args": { + "External id": 234237,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478283.530, "dur": 14.216, + "args": { + "External id": 234238,"Record function id": 0, "Ev Idx": 2813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478286.717, "dur": 10.019, + "args": { + "External id": 234239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478290.821, "dur": 5.096, + "args": { + "External id": 234240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478292.050, "dur": 3.734, + "args": { + "External id": 234241,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478301.341, "dur": 4.797, + "args": { + "External id": 234242,"Record function id": 0, "Ev Idx": 2817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478302.936, "dur": 2.713, + "args": { + "External id": 234243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478303.542, "dur": 1.517, + "args": { + "External id": 234244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478303.885, "dur": 1.097, + "args": { + "External id": 234245,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478309.285, "dur": 4.024, + "args": { + "External id": 234246,"Record function id": 0, "Ev Idx": 2821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478310.764, "dur": 2.127, + "args": { + "External id": 234247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478311.262, "dur": 1.194, + "args": { + "External id": 234248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478311.655, "dur": 0.702, + "args": { + "External id": 234249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478316.467, "dur": 6.593, + "args": { + "External id": 234250,"Record function id": 0, "Ev Idx": 2825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478317.867, "dur": 4.766, + "args": { + "External id": 234251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478318.405, "dur": 3.533, + "args": { + "External id": 234252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478318.728, "dur": 3.120, + "args": { + "External id": 234253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478326.157, "dur": 5.054, + "args": { + "External id": 234254,"Record function id": 0, "Ev Idx": 2829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478327.742, "dur": 3.041, + "args": { + "External id": 234255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478328.416, "dur": 1.955, + "args": { + "External id": 234256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478328.792, "dur": 1.488, + "args": { + "External id": 234257,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478334.253, "dur": 4.455, + "args": { + "External id": 234258,"Record function id": 0, "Ev Idx": 2833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478335.745, "dur": 2.537, + "args": { + "External id": 234259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478336.280, "dur": 1.557, + "args": { + "External id": 234260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478336.802, "dur": 0.967, + "args": { + "External id": 234261,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478341.835, "dur": 4.530, + "args": { + "External id": 234262,"Record function id": 0, "Ev Idx": 2837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478343.126, "dur": 2.797, + "args": { + "External id": 234263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478343.778, "dur": 1.423, + "args": { + "External id": 234264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478344.253, "dur": 0.876, + "args": { + "External id": 234265,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478349.342, "dur": 4.453, + "args": { + "External id": 234266,"Record function id": 0, "Ev Idx": 2841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478350.609, "dur": 2.771, + "args": { + "External id": 234267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478351.063, "dur": 1.635, + "args": { + "External id": 234268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478351.625, "dur": 0.998, + "args": { + "External id": 234269,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478356.784, "dur": 4.247, + "args": { + "External id": 234270,"Record function id": 0, "Ev Idx": 2845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097478358.473, "dur": 2.115, + "args": { + "External id": 234271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478358.986, "dur": 1.159, + "args": { + "External id": 234272,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097478359.410, "dur": 0.659, + "args": { + "External id": 234273,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097478364.651, "dur": 37229.792, + "args": { + "External id": 234274,"Record function id": 0, "Sequence number": 959131, "Fwd thread id": 1, "Ev Idx": 2849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097478366.140, "dur": 37220.626, + "args": { + "External id": 234275,"Sequence number": 959131, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2850 + } + }, + { + "ph": "f", "id": 45, "pid": 2070552, "tid": 2107648, "ts": 5327097478366.140, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 2070552, "tid": 2107648, + "ts": 5327097478397.183, "dur": 42.191, + "args": { + "External id": 234276,"Record function id": 0, "Ev Idx": 2851 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 2070552, "tid": 2107648, + "ts": 5327097478447.084, "dur": 66.436, + "args": { + "External id": 234277,"Record function id": 0, "Ev Idx": 2852 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 2070552, "tid": 2107648, + "ts": 5327097478519.823, "dur": 37060.112, + "args": { + "External id": 234278,"Record function id": 0, "Ev Idx": 2853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097478605.738, "dur": 6.415, + "args": { + "External id": 234279,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097478658.112, "dur": 5.136, + "args": { + "External id": 234280,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097478678.098, "dur": 36171.991, + "args": { + "External id": 234281,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097478695.466, "dur": 36145.798, + "args": { + "External id": 234282,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097478737.583, "dur": 14.320, + "args": { + "External id": 234283,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097478757.935, "dur": 36040.022, + "args": { + "External id": 234284,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097478760.515, "dur": 36036.695, + "args": { + "External id": 234285,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097478764.332, "dur": 6.516, + "args": { + "External id": 234286,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097478772.446, "dur": 36021.193, + "args": { + "External id": 234287,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097514938.441, "dur": 10.870, + "args": { + "External id": 234288,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097514941.236, "dur": 7.712, + "args": { + "External id": 234289,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097514988.673, "dur": 311.332, + "args": { + "External id": 234290,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097515017.354, "dur": 277.870, + "args": { + "External id": 234291,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2866, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097515029.144, "dur": 260.572, + "args": { + "External id": 234292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097515323.021, "dur": 2.008, + "args": { + "External id": 234293,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2868, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097515383.352, "dur": 5.995, + "args": { + "External id": 234294,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097515430.646, "dur": 3.441, + "args": { + "External id": 234295,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097515449.534, "dur": 1.510, + "args": { + "External id": 234296,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097515463.750, "dur": 0.884, + "args": { + "External id": 234297,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097515475.583, "dur": 0.706, + "args": { + "External id": 234298,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097515486.516, "dur": 1.938, + "args": { + "External id": 234299,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097515499.003, "dur": 0.665, + "args": { + "External id": 234300,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097515510.986, "dur": 2.200, + "args": { + "External id": 234301,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097515522.999, "dur": 1.308, + "args": { + "External id": 234302,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097515607.464, "dur": 2705.757, + "args": { + "External id": 234303,"Record function id": 0, "Ev Idx": 2878 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 2070552, "tid": 2107648, + "ts": 5327097515659.456, "dur": 1010.797, + "args": { + "External id": 234304,"Record function id": 0, "Ev Idx": 2879 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2070552, "tid": 2107648, + "ts": 5327097515673.650, "dur": 327.664, + "args": { + "External id": 234305,"Record function id": 0, "Ev Idx": 2880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097515759.540, "dur": 4.500, + "args": { + "External id": 234306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097515767.632, "dur": 0.900, + "args": { + "External id": 234307,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097515770.564, "dur": 3.042, + "args": { + "External id": 234308,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097515775.702, "dur": 0.946, + "args": { + "External id": 234309,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097515778.338, "dur": 1.082, + "args": { + "External id": 234310,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097515781.238, "dur": 0.465, + "args": { + "External id": 234311,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097515783.538, "dur": 1.644, + "args": { + "External id": 234312,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097515786.690, "dur": 0.690, + "args": { + "External id": 234313,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097515789.041, "dur": 0.761, + "args": { + "External id": 234314,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097515791.669, "dur": 0.631, + "args": { + "External id": 234315,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097515810.305, "dur": 145.912, + "args": { + "External id": 234316,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097515827.674, "dur": 124.015, + "args": { + "External id": 234317,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097515841.404, "dur": 14.463, + "args": { + "External id": 234318,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097515859.863, "dur": 64.534, + "args": { + "External id": 234319,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097515862.291, "dur": 61.799, + "args": { + "External id": 234320,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097515866.133, "dur": 6.210, + "args": { + "External id": 234321,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097515873.826, "dur": 49.456, + "args": { + "External id": 234322,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 2070552, "tid": 2107648, + "ts": 5327097516088.046, "dur": 529.909, + "args": { + "External id": 234323,"Record function id": 0, "Ev Idx": 2898 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2070552, "tid": 2107648, + "ts": 5327097516106.787, "dur": 499.021, + "args": { + "External id": 234324,"Record function id": 0, "Ev Idx": 2899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097516167.351, "dur": 5.156, + "args": { + "External id": 234325,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097516188.701, "dur": 29.715, + "args": { + "External id": 234326,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516193.116, "dur": 1.712, + "args": { + "External id": 234327,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516196.521, "dur": 0.640, + "args": { + "External id": 234328,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516199.280, "dur": 0.737, + "args": { + "External id": 234329,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516201.844, "dur": 0.322, + "args": { + "External id": 234330,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516204.160, "dur": 0.616, + "args": { + "External id": 234331,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516206.094, "dur": 0.794, + "args": { + "External id": 234332,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516208.480, "dur": 2.335, + "args": { + "External id": 234333,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516211.984, "dur": 0.430, + "args": { + "External id": 234334,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516214.106, "dur": 0.287, + "args": { + "External id": 234335,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097516229.091, "dur": 36.038, + "args": { + "External id": 234336,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097516293.243, "dur": 97.910, + "args": { + "External id": 234337,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097516303.180, "dur": 3.087, + "args": { + "External id": 234338,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097516311.185, "dur": 9.935, + "args": { + "External id": 234339,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097516315.354, "dur": 5.335, + "args": { + "External id": 234340,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516318.818, "dur": 0.782, + "args": { + "External id": 234341,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097516327.740, "dur": 27.272, + "args": { + "External id": 234342,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516329.575, "dur": 0.865, + "args": { + "External id": 234343,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516332.034, "dur": 0.497, + "args": { + "External id": 234344,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516337.513, "dur": 0.659, + "args": { + "External id": 234345,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516339.591, "dur": 2.439, + "args": { + "External id": 234346,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516343.403, "dur": 0.648, + "args": { + "External id": 234347,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516345.618, "dur": 0.398, + "args": { + "External id": 234348,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516347.058, "dur": 0.466, + "args": { + "External id": 234349,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516349.171, "dur": 1.012, + "args": { + "External id": 234350,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097516351.640, "dur": 0.503, + "args": { + "External id": 234351,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097516364.472, "dur": 19.359, + "args": { + "External id": 234352,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097516431.688, "dur": 109.400, + "args": { + "External id": 234353,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097516456.211, "dur": 81.601, + "args": { + "External id": 234354,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2929, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097516464.933, "dur": 68.726, + "args": { + "External id": 234355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097516557.370, "dur": 1.802, + "args": { + "External id": 234356,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2931, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097516678.782, "dur": 1613.406, + "args": { + "External id": 234357,"Sequence number": 959130, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2932 + } + }, + { + "ph": "f", "id": 46, "pid": 2070552, "tid": 2107648, "ts": 5327097516678.782, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097516789.946, "dur": 105.939, + "args": { + "External id": 234358,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097516937.220, "dur": 54.677, + "args": { + "External id": 234359,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097517011.780, "dur": 54.139, + "args": { + "External id": 234360,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097517076.038, "dur": 33.019, + "args": { + "External id": 234361,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097517115.312, "dur": 44.701, + "args": { + "External id": 234362,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097517168.529, "dur": 27.363, + "args": { + "External id": 234363,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097517203.229, "dur": 42.091, + "args": { + "External id": 234364,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097517269.860, "dur": 24.289, + "args": { + "External id": 234365,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097517312.727, "dur": 28.332, + "args": { + "External id": 234366,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097517362.060, "dur": 18.636, + "args": { + "External id": 234367,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097517392.868, "dur": 14.727, + "args": { + "External id": 234368,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097517418.630, "dur": 29.042, + "args": { + "External id": 234369,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097517450.335, "dur": 32.011, + "args": { + "External id": 234370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097517510.805, "dur": 211.164, + "args": { + "External id": 234371,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097517587.844, "dur": 5.992, + "args": { + "External id": 234372,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097517595.537, "dur": 3.307, + "args": { + "External id": 234373,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097517757.130, "dur": 26.979, + "args": { + "External id": 234374,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097517796.240, "dur": 14.331, + "args": { + "External id": 234375,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097517819.108, "dur": 41.058, + "args": { + "External id": 234376,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097517868.338, "dur": 34.055, + "args": { + "External id": 234377,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097517908.450, "dur": 22.530, + "args": { + "External id": 234378,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097517935.697, "dur": 30.127, + "args": { + "External id": 234379,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097517970.971, "dur": 40.704, + "args": { + "External id": 234380,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097518022.254, "dur": 31.010, + "args": { + "External id": 234381,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097518084.581, "dur": 31.775, + "args": { + "External id": 234382,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097518137.916, "dur": 27.739, + "args": { + "External id": 234383,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097518181.152, "dur": 16.989, + "args": { + "External id": 234384,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097518214.661, "dur": 17.910, + "args": { + "External id": 234385,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097518246.657, "dur": 14.968, + "args": { + "External id": 234386,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518335.567, "dur": 14.292, + "args": { + "External id": 234387,"Record function id": 0, "Ev Idx": 2962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518338.577, "dur": 10.258, + "args": { + "External id": 234388,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518342.487, "dur": 5.447, + "args": { + "External id": 234389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518343.671, "dur": 4.162, + "args": { + "External id": 234390,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518353.743, "dur": 5.016, + "args": { + "External id": 234391,"Record function id": 0, "Ev Idx": 2966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518355.241, "dur": 3.036, + "args": { + "External id": 234392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518355.913, "dur": 1.724, + "args": { + "External id": 234393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518356.417, "dur": 1.150, + "args": { + "External id": 234394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518362.056, "dur": 4.799, + "args": { + "External id": 234395,"Record function id": 0, "Ev Idx": 2970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518363.703, "dur": 2.714, + "args": { + "External id": 234396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518364.211, "dur": 1.780, + "args": { + "External id": 234397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518364.823, "dur": 1.087, + "args": { + "External id": 234398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518370.044, "dur": 5.893, + "args": { + "External id": 234399,"Record function id": 0, "Ev Idx": 2974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518371.287, "dur": 4.236, + "args": { + "External id": 234400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518372.127, "dur": 2.978, + "args": { + "External id": 234401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518372.404, "dur": 2.628, + "args": { + "External id": 234402,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518379.012, "dur": 4.264, + "args": { + "External id": 234403,"Record function id": 0, "Ev Idx": 2978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518380.570, "dur": 2.287, + "args": { + "External id": 234404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518381.013, "dur": 1.391, + "args": { + "External id": 234405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518381.554, "dur": 0.770, + "args": { + "External id": 234406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518386.332, "dur": 4.221, + "args": { + "External id": 234407,"Record function id": 0, "Ev Idx": 2982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518387.850, "dur": 2.303, + "args": { + "External id": 234408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518388.331, "dur": 1.373, + "args": { + "External id": 234409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518388.638, "dur": 0.992, + "args": { + "External id": 234410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518393.660, "dur": 4.604, + "args": { + "External id": 234411,"Record function id": 0, "Ev Idx": 2986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518395.003, "dur": 2.825, + "args": { + "External id": 234412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518395.642, "dur": 1.649, + "args": { + "External id": 234413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518396.160, "dur": 1.065, + "args": { + "External id": 234414,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518401.302, "dur": 3.948, + "args": { + "External id": 234415,"Record function id": 0, "Ev Idx": 2990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518402.555, "dur": 2.281, + "args": { + "External id": 234416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518403.028, "dur": 1.239, + "args": { + "External id": 234417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518403.342, "dur": 0.849, + "args": { + "External id": 234418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518408.299, "dur": 4.278, + "args": { + "External id": 234419,"Record function id": 0, "Ev Idx": 2994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097518409.663, "dur": 2.469, + "args": { + "External id": 234420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518410.120, "dur": 1.566, + "args": { + "External id": 234421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097518410.705, "dur": 0.912, + "args": { + "External id": 234422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097518416.640, "dur": 39574.869, + "args": { + "External id": 234423,"Record function id": 0, "Sequence number": 959129, "Fwd thread id": 1, "Ev Idx": 2998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097518417.855, "dur": 39550.808, + "args": { + "External id": 234424,"Sequence number": 959129, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2999 + } + }, + { + "ph": "f", "id": 47, "pid": 2070552, "tid": 2107648, "ts": 5327097518417.855, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 2070552, "tid": 2107648, + "ts": 5327097518444.778, "dur": 37.477, + "args": { + "External id": 234425,"Record function id": 0, "Ev Idx": 3000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 2070552, "tid": 2107648, + "ts": 5327097518489.874, "dur": 63.654, + "args": { + "External id": 234426,"Record function id": 0, "Ev Idx": 3001 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 2070552, "tid": 2107648, + "ts": 5327097518559.624, "dur": 39401.233, + "args": { + "External id": 234427,"Record function id": 0, "Ev Idx": 3002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097518689.324, "dur": 8.737, + "args": { + "External id": 234428,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097518712.126, "dur": 5.040, + "args": { + "External id": 234429,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097518732.606, "dur": 38264.254, + "args": { + "External id": 234430,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097518746.007, "dur": 38241.851, + "args": { + "External id": 234431,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097518786.133, "dur": 14.387, + "args": { + "External id": 234432,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097518806.861, "dur": 38132.273, + "args": { + "External id": 234433,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097518809.391, "dur": 38128.982, + "args": { + "External id": 234434,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097518813.106, "dur": 4.926, + "args": { + "External id": 234435,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097518820.063, "dur": 38115.019, + "args": { + "External id": 234436,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097557083.563, "dur": 8.381, + "args": { + "External id": 234437,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097557086.141, "dur": 5.483, + "args": { + "External id": 234438,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097557117.718, "dur": 562.429, + "args": { + "External id": 234439,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097557144.363, "dur": 530.469, + "args": { + "External id": 234440,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3015, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097557154.324, "dur": 514.229, + "args": { + "External id": 234441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097557702.151, "dur": 2.524, + "args": { + "External id": 234442,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3017, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097557766.144, "dur": 6.441, + "args": { + "External id": 234443,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097557817.880, "dur": 2.412, + "args": { + "External id": 234444,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097557835.685, "dur": 0.851, + "args": { + "External id": 234445,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097557848.028, "dur": 0.804, + "args": { + "External id": 234446,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097557858.247, "dur": 0.795, + "args": { + "External id": 234447,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097557867.676, "dur": 2.013, + "args": { + "External id": 234448,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097557879.566, "dur": 1.137, + "args": { + "External id": 234449,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097557890.917, "dur": 1.671, + "args": { + "External id": 234450,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097557901.776, "dur": 0.905, + "args": { + "External id": 234451,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097558008.026, "dur": 2647.969, + "args": { + "External id": 234452,"Record function id": 0, "Ev Idx": 3027 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 2070552, "tid": 2107648, + "ts": 5327097558028.667, "dur": 994.544, + "args": { + "External id": 234453,"Record function id": 0, "Ev Idx": 3028 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2070552, "tid": 2107648, + "ts": 5327097558042.110, "dur": 301.469, + "args": { + "External id": 234454,"Record function id": 0, "Ev Idx": 3029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097558125.176, "dur": 4.682, + "args": { + "External id": 234455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097558133.094, "dur": 0.922, + "args": { + "External id": 234456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097558135.845, "dur": 1.304, + "args": { + "External id": 234457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097558139.439, "dur": 1.350, + "args": { + "External id": 234458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097558142.754, "dur": 0.825, + "args": { + "External id": 234459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097558144.998, "dur": 0.850, + "args": { + "External id": 234460,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097558147.490, "dur": 1.860, + "args": { + "External id": 234461,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097558150.704, "dur": 0.867, + "args": { + "External id": 234462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097558153.494, "dur": 1.175, + "args": { + "External id": 234463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097558156.341, "dur": 0.835, + "args": { + "External id": 234464,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097558173.887, "dur": 142.867, + "args": { + "External id": 234465,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097558188.647, "dur": 123.677, + "args": { + "External id": 234466,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097558201.605, "dur": 12.327, + "args": { + "External id": 234467,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097558217.846, "dur": 66.291, + "args": { + "External id": 234468,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097558220.385, "dur": 63.401, + "args": { + "External id": 234469,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558224.141, "dur": 6.176, + "args": { + "External id": 234470,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097558231.839, "dur": 51.323, + "args": { + "External id": 234471,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3046 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 2070552, "tid": 2107648, + "ts": 5327097558426.007, "dur": 589.780, + "args": { + "External id": 234472,"Record function id": 0, "Ev Idx": 3047 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2070552, "tid": 2107648, + "ts": 5327097558441.073, "dur": 561.739, + "args": { + "External id": 234473,"Record function id": 0, "Ev Idx": 3048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097558497.902, "dur": 4.393, + "args": { + "External id": 234474,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097558517.727, "dur": 28.146, + "args": { + "External id": 234475,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558521.930, "dur": 2.009, + "args": { + "External id": 234476,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558526.560, "dur": 0.880, + "args": { + "External id": 234477,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558528.835, "dur": 0.669, + "args": { + "External id": 234478,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558531.196, "dur": 0.957, + "args": { + "External id": 234479,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558533.051, "dur": 1.132, + "args": { + "External id": 234480,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558536.179, "dur": 0.601, + "args": { + "External id": 234481,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558538.355, "dur": 0.669, + "args": { + "External id": 234482,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558539.960, "dur": 0.654, + "args": { + "External id": 234483,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558541.931, "dur": 0.611, + "args": { + "External id": 234484,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097558555.192, "dur": 30.234, + "args": { + "External id": 234485,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097558615.398, "dur": 154.958, + "args": { + "External id": 234486,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097558663.286, "dur": 4.673, + "args": { + "External id": 234487,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097558673.474, "dur": 10.237, + "args": { + "External id": 234488,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097558677.249, "dur": 6.036, + "args": { + "External id": 234489,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558680.864, "dur": 0.904, + "args": { + "External id": 234490,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097558691.199, "dur": 37.077, + "args": { + "External id": 234491,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558693.617, "dur": 0.685, + "args": { + "External id": 234492,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558695.485, "dur": 1.040, + "args": { + "External id": 234493,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558697.918, "dur": 0.752, + "args": { + "External id": 234494,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558699.565, "dur": 0.679, + "args": { + "External id": 234495,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558701.498, "dur": 0.600, + "args": { + "External id": 234496,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558718.691, "dur": 0.519, + "args": { + "External id": 234497,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558720.469, "dur": 0.711, + "args": { + "External id": 234498,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558723.022, "dur": 0.604, + "args": { + "External id": 234499,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097558724.624, "dur": 0.594, + "args": { + "External id": 234500,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097558738.835, "dur": 23.516, + "args": { + "External id": 234501,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097558814.865, "dur": 108.608, + "args": { + "External id": 234502,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097558836.150, "dur": 83.941, + "args": { + "External id": 234503,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3078, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097558844.226, "dur": 69.806, + "args": { + "External id": 234504,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097558936.536, "dur": 1.718, + "args": { + "External id": 234505,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3080, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097559031.175, "dur": 1567.410, + "args": { + "External id": 234506,"Sequence number": 959128, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3081 + } + }, + { + "ph": "f", "id": 48, "pid": 2070552, "tid": 2107648, "ts": 5327097559031.175, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097559142.259, "dur": 105.860, + "args": { + "External id": 234507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097559285.255, "dur": 41.042, + "args": { + "External id": 234508,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097559342.922, "dur": 50.100, + "args": { + "External id": 234509,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097559403.098, "dur": 33.355, + "args": { + "External id": 234510,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097559442.445, "dur": 46.040, + "args": { + "External id": 234511,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097559494.710, "dur": 28.443, + "args": { + "External id": 234512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097559530.208, "dur": 42.277, + "args": { + "External id": 234513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097559593.576, "dur": 24.646, + "args": { + "External id": 234514,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097559676.245, "dur": 30.494, + "args": { + "External id": 234515,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097559728.449, "dur": 19.214, + "args": { + "External id": 234516,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097559760.034, "dur": 14.899, + "args": { + "External id": 234517,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097559783.716, "dur": 35.723, + "args": { + "External id": 234518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097559822.277, "dur": 35.128, + "args": { + "External id": 234519,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097559885.406, "dur": 183.649, + "args": { + "External id": 234520,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097559958.160, "dur": 6.649, + "args": { + "External id": 234521,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097559966.510, "dur": 3.097, + "args": { + "External id": 234522,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097560102.207, "dur": 25.005, + "args": { + "External id": 234523,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097560138.311, "dur": 14.105, + "args": { + "External id": 234524,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097560160.259, "dur": 39.029, + "args": { + "External id": 234525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097560205.574, "dur": 35.245, + "args": { + "External id": 234526,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097560247.580, "dur": 22.371, + "args": { + "External id": 234527,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097560275.175, "dur": 30.855, + "args": { + "External id": 234528,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097560311.341, "dur": 21.074, + "args": { + "External id": 234529,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097560339.204, "dur": 29.833, + "args": { + "External id": 234530,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097560388.758, "dur": 20.969, + "args": { + "External id": 234531,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097560426.145, "dur": 43.125, + "args": { + "External id": 234532,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097560490.604, "dur": 17.727, + "args": { + "External id": 234533,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097560523.618, "dur": 14.942, + "args": { + "External id": 234534,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097560550.287, "dur": 20.185, + "args": { + "External id": 234535,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560679.208, "dur": 15.917, + "args": { + "External id": 234536,"Record function id": 0, "Ev Idx": 3111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560682.379, "dur": 11.724, + "args": { + "External id": 234537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560686.899, "dur": 5.969, + "args": { + "External id": 234538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560688.146, "dur": 4.632, + "args": { + "External id": 234539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560698.832, "dur": 4.546, + "args": { + "External id": 234540,"Record function id": 0, "Ev Idx": 3115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560700.189, "dur": 2.767, + "args": { + "External id": 234541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560700.981, "dur": 1.518, + "args": { + "External id": 234542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560701.351, "dur": 1.056, + "args": { + "External id": 234543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560706.544, "dur": 4.713, + "args": { + "External id": 234544,"Record function id": 0, "Ev Idx": 3119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560707.997, "dur": 2.848, + "args": { + "External id": 234545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560708.694, "dur": 1.735, + "args": { + "External id": 234546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560709.103, "dur": 1.258, + "args": { + "External id": 234547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560714.480, "dur": 4.055, + "args": { + "External id": 234548,"Record function id": 0, "Ev Idx": 3123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560715.610, "dur": 2.520, + "args": { + "External id": 234549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560716.501, "dur": 1.237, + "args": { + "External id": 234550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560716.836, "dur": 0.827, + "args": { + "External id": 234551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560721.791, "dur": 4.154, + "args": { + "External id": 234552,"Record function id": 0, "Ev Idx": 3127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560722.845, "dur": 2.687, + "args": { + "External id": 234553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560723.626, "dur": 1.475, + "args": { + "External id": 234554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560724.034, "dur": 1.002, + "args": { + "External id": 234555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560732.087, "dur": 3.479, + "args": { + "External id": 234556,"Record function id": 0, "Ev Idx": 3131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560733.178, "dur": 1.958, + "args": { + "External id": 234557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560733.635, "dur": 1.111, + "args": { + "External id": 234558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560733.996, "dur": 0.682, + "args": { + "External id": 234559,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560738.587, "dur": 4.542, + "args": { + "External id": 234560,"Record function id": 0, "Ev Idx": 3135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560739.785, "dur": 2.949, + "args": { + "External id": 234561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560740.222, "dur": 2.007, + "args": { + "External id": 234562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560741.204, "dur": 0.951, + "args": { + "External id": 234563,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560746.065, "dur": 5.011, + "args": { + "External id": 234564,"Record function id": 0, "Ev Idx": 3139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560747.203, "dur": 3.452, + "args": { + "External id": 234565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560747.631, "dur": 2.569, + "args": { + "External id": 234566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560748.736, "dur": 1.401, + "args": { + "External id": 234567,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560754.295, "dur": 3.514, + "args": { + "External id": 234568,"Record function id": 0, "Ev Idx": 3143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097560755.308, "dur": 2.083, + "args": { + "External id": 234569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560755.787, "dur": 1.208, + "args": { + "External id": 234570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097560756.160, "dur": 0.761, + "args": { + "External id": 234571,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097560762.645, "dur": 37181.988, + "args": { + "External id": 234572,"Record function id": 0, "Sequence number": 959127, "Fwd thread id": 1, "Ev Idx": 3147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097560763.962, "dur": 37172.050, + "args": { + "External id": 234573,"Sequence number": 959127, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3148 + } + }, + { + "ph": "f", "id": 49, "pid": 2070552, "tid": 2107648, "ts": 5327097560763.962, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 2070552, "tid": 2107648, + "ts": 5327097560796.478, "dur": 38.610, + "args": { + "External id": 234574,"Record function id": 0, "Ev Idx": 3149 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 2070552, "tid": 2107648, + "ts": 5327097560841.932, "dur": 72.074, + "args": { + "External id": 234575,"Record function id": 0, "Ev Idx": 3150 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 2070552, "tid": 2107648, + "ts": 5327097560920.418, "dur": 37007.564, + "args": { + "External id": 234576,"Record function id": 0, "Ev Idx": 3151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097561024.080, "dur": 7.863, + "args": { + "External id": 234577,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097561041.931, "dur": 4.858, + "args": { + "External id": 234578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097561061.396, "dur": 36076.254, + "args": { + "External id": 234579,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097561074.231, "dur": 36054.872, + "args": { + "External id": 234580,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097561108.022, "dur": 14.085, + "args": { + "External id": 234581,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097561128.066, "dur": 35963.003, + "args": { + "External id": 234582,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097561130.452, "dur": 35959.865, + "args": { + "External id": 234583,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097561140.144, "dur": 5.031, + "args": { + "External id": 234584,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097561147.238, "dur": 35939.770, + "args": { + "External id": 234585,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097597223.525, "dur": 9.073, + "args": { + "External id": 234586,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097597226.037, "dur": 6.189, + "args": { + "External id": 234587,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097597258.854, "dur": 352.160, + "args": { + "External id": 234588,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097597284.455, "dur": 322.146, + "args": { + "External id": 234589,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3164, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097597294.397, "dur": 307.286, + "args": { + "External id": 234590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097597661.089, "dur": 3.435, + "args": { + "External id": 234591,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3166, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097597725.985, "dur": 6.633, + "args": { + "External id": 234592,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097597775.328, "dur": 1.380, + "args": { + "External id": 234593,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097597791.944, "dur": 1.694, + "args": { + "External id": 234594,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097597805.718, "dur": 0.984, + "args": { + "External id": 234595,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097597819.279, "dur": 1.222, + "args": { + "External id": 234596,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097597832.130, "dur": 0.931, + "args": { + "External id": 234597,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097597844.154, "dur": 0.968, + "args": { + "External id": 234598,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097597855.783, "dur": 1.762, + "args": { + "External id": 234599,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097597868.374, "dur": 0.863, + "args": { + "External id": 234600,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097597959.740, "dur": 2705.053, + "args": { + "External id": 234601,"Record function id": 0, "Ev Idx": 3176 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 2070552, "tid": 2107648, + "ts": 5327097597996.440, "dur": 1018.415, + "args": { + "External id": 234602,"Record function id": 0, "Ev Idx": 3177 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2070552, "tid": 2107648, + "ts": 5327097598012.731, "dur": 315.600, + "args": { + "External id": 234603,"Record function id": 0, "Ev Idx": 3178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097598099.676, "dur": 4.413, + "args": { + "External id": 234604,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097598108.120, "dur": 0.931, + "args": { + "External id": 234605,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097598111.066, "dur": 1.176, + "args": { + "External id": 234606,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097598114.425, "dur": 0.943, + "args": { + "External id": 234607,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097598117.077, "dur": 1.100, + "args": { + "External id": 234608,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097598119.631, "dur": 0.978, + "args": { + "External id": 234609,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097598122.548, "dur": 1.922, + "args": { + "External id": 234610,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097598125.938, "dur": 0.759, + "args": { + "External id": 234611,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097598128.637, "dur": 1.245, + "args": { + "External id": 234612,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097598131.762, "dur": 1.037, + "args": { + "External id": 234613,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097598151.716, "dur": 148.138, + "args": { + "External id": 234614,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097598169.004, "dur": 126.393, + "args": { + "External id": 234615,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097598182.420, "dur": 12.426, + "args": { + "External id": 234616,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097598198.607, "dur": 67.944, + "args": { + "External id": 234617,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097598201.247, "dur": 64.975, + "args": { + "External id": 234618,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598205.859, "dur": 6.801, + "args": { + "External id": 234619,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097598214.314, "dur": 51.305, + "args": { + "External id": 234620,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3195 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 2070552, "tid": 2107648, + "ts": 5327097598415.632, "dur": 590.456, + "args": { + "External id": 234621,"Record function id": 0, "Ev Idx": 3196 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2070552, "tid": 2107648, + "ts": 5327097598430.997, "dur": 543.001, + "args": { + "External id": 234622,"Record function id": 0, "Ev Idx": 3197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097598490.425, "dur": 5.116, + "args": { + "External id": 234623,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097598510.674, "dur": 28.902, + "args": { + "External id": 234624,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598515.734, "dur": 1.726, + "args": { + "External id": 234625,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598519.118, "dur": 0.935, + "args": { + "External id": 234626,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598522.933, "dur": 0.690, + "args": { + "External id": 234627,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598524.775, "dur": 0.745, + "args": { + "External id": 234628,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598527.022, "dur": 0.863, + "args": { + "External id": 234629,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598529.165, "dur": 0.593, + "args": { + "External id": 234630,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598531.105, "dur": 0.843, + "args": { + "External id": 234631,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598533.347, "dur": 1.012, + "args": { + "External id": 234632,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598535.527, "dur": 0.978, + "args": { + "External id": 234633,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097598550.098, "dur": 30.928, + "args": { + "External id": 234634,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097598611.368, "dur": 145.865, + "args": { + "External id": 234635,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097598661.588, "dur": 5.007, + "args": { + "External id": 234636,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097598672.299, "dur": 10.512, + "args": { + "External id": 234637,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097598676.269, "dur": 6.112, + "args": { + "External id": 234638,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598679.985, "dur": 0.841, + "args": { + "External id": 234639,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097598692.410, "dur": 22.191, + "args": { + "External id": 234640,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598694.583, "dur": 0.850, + "args": { + "External id": 234641,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598696.850, "dur": 0.787, + "args": { + "External id": 234642,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598698.809, "dur": 0.607, + "args": { + "External id": 234643,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598701.168, "dur": 1.034, + "args": { + "External id": 234644,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598703.808, "dur": 0.896, + "args": { + "External id": 234645,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598705.915, "dur": 0.586, + "args": { + "External id": 234646,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598707.804, "dur": 0.940, + "args": { + "External id": 234647,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598709.501, "dur": 0.640, + "args": { + "External id": 234648,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097598711.586, "dur": 0.567, + "args": { + "External id": 234649,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097598725.826, "dur": 23.704, + "args": { + "External id": 234650,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097598801.736, "dur": 108.537, + "args": { + "External id": 234651,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097598825.460, "dur": 81.546, + "args": { + "External id": 234652,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3227, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097598834.468, "dur": 68.116, + "args": { + "External id": 234653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097598924.797, "dur": 2.014, + "args": { + "External id": 234654,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3229, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097599022.715, "dur": 1584.027, + "args": { + "External id": 234655,"Sequence number": 959126, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3230 + } + }, + { + "ph": "f", "id": 50, "pid": 2070552, "tid": 2107648, "ts": 5327097599022.715, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097599134.632, "dur": 105.558, + "args": { + "External id": 234656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097599284.572, "dur": 40.707, + "args": { + "External id": 234657,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097599341.882, "dur": 48.268, + "args": { + "External id": 234658,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097599400.548, "dur": 31.583, + "args": { + "External id": 234659,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097599438.254, "dur": 46.069, + "args": { + "External id": 234660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097599491.055, "dur": 27.958, + "args": { + "External id": 234661,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097599525.191, "dur": 42.600, + "args": { + "External id": 234662,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097599589.240, "dur": 20.998, + "args": { + "External id": 234663,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097599677.161, "dur": 31.362, + "args": { + "External id": 234664,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097599732.192, "dur": 19.486, + "args": { + "External id": 234665,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097599765.194, "dur": 15.300, + "args": { + "External id": 234666,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097599788.899, "dur": 33.462, + "args": { + "External id": 234667,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097599825.473, "dur": 33.434, + "args": { + "External id": 234668,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097599886.257, "dur": 179.638, + "args": { + "External id": 234669,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097599957.876, "dur": 6.132, + "args": { + "External id": 234670,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097599965.714, "dur": 3.307, + "args": { + "External id": 234671,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097600100.318, "dur": 27.233, + "args": { + "External id": 234672,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097600139.372, "dur": 14.478, + "args": { + "External id": 234673,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097600162.502, "dur": 39.973, + "args": { + "External id": 234674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097600208.457, "dur": 35.797, + "args": { + "External id": 234675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097600251.400, "dur": 22.790, + "args": { + "External id": 234676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097600278.740, "dur": 32.474, + "args": { + "External id": 234677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097600316.785, "dur": 21.417, + "args": { + "External id": 234678,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097600344.543, "dur": 32.234, + "args": { + "External id": 234679,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097600394.763, "dur": 35.233, + "args": { + "External id": 234680,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097600457.852, "dur": 27.126, + "args": { + "External id": 234681,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097600502.186, "dur": 16.938, + "args": { + "External id": 234682,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097600534.249, "dur": 14.549, + "args": { + "External id": 234683,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097600560.986, "dur": 18.526, + "args": { + "External id": 234684,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600690.539, "dur": 16.717, + "args": { + "External id": 234685,"Record function id": 0, "Ev Idx": 3260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600694.076, "dur": 12.142, + "args": { + "External id": 234686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600698.310, "dur": 6.437, + "args": { + "External id": 234687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600700.144, "dur": 4.498, + "args": { + "External id": 234688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600710.924, "dur": 4.721, + "args": { + "External id": 234689,"Record function id": 0, "Ev Idx": 3264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600712.254, "dur": 2.889, + "args": { + "External id": 234690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600713.441, "dur": 1.277, + "args": { + "External id": 234691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600713.787, "dur": 0.849, + "args": { + "External id": 234692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600718.816, "dur": 4.869, + "args": { + "External id": 234693,"Record function id": 0, "Ev Idx": 3268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600720.587, "dur": 2.682, + "args": { + "External id": 234694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600721.189, "dur": 1.668, + "args": { + "External id": 234695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600721.569, "dur": 1.214, + "args": { + "External id": 234696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600726.750, "dur": 5.241, + "args": { + "External id": 234697,"Record function id": 0, "Ev Idx": 3272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600728.688, "dur": 2.870, + "args": { + "External id": 234698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600729.455, "dur": 1.437, + "args": { + "External id": 234699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600729.903, "dur": 0.923, + "args": { + "External id": 234700,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600735.033, "dur": 4.068, + "args": { + "External id": 234701,"Record function id": 0, "Ev Idx": 3276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600736.365, "dur": 2.350, + "args": { + "External id": 234702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600736.775, "dur": 1.551, + "args": { + "External id": 234703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600737.576, "dur": 0.680, + "args": { + "External id": 234704,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600742.167, "dur": 8.107, + "args": { + "External id": 234705,"Record function id": 0, "Ev Idx": 3280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600746.826, "dur": 3.066, + "args": { + "External id": 234706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600747.548, "dur": 1.928, + "args": { + "External id": 234707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600747.945, "dur": 1.463, + "args": { + "External id": 234708,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600753.351, "dur": 4.594, + "args": { + "External id": 234709,"Record function id": 0, "Ev Idx": 3284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600754.473, "dur": 3.050, + "args": { + "External id": 234710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600755.127, "dur": 2.007, + "args": { + "External id": 234711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600755.950, "dur": 1.110, + "args": { + "External id": 234712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600760.976, "dur": 4.524, + "args": { + "External id": 234713,"Record function id": 0, "Ev Idx": 3288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600762.458, "dur": 2.649, + "args": { + "External id": 234714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600763.069, "dur": 1.630, + "args": { + "External id": 234715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600763.780, "dur": 0.853, + "args": { + "External id": 234716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600768.598, "dur": 4.217, + "args": { + "External id": 234717,"Record function id": 0, "Ev Idx": 3292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097600769.870, "dur": 2.534, + "args": { + "External id": 234718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600770.520, "dur": 1.499, + "args": { + "External id": 234719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097600771.074, "dur": 0.879, + "args": { + "External id": 234720,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097600777.147, "dur": 36135.804, + "args": { + "External id": 234721,"Record function id": 0, "Sequence number": 959125, "Fwd thread id": 1, "Ev Idx": 3296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097600778.561, "dur": 36125.417, + "args": { + "External id": 234722,"Sequence number": 959125, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3297 + } + }, + { + "ph": "f", "id": 51, "pid": 2070552, "tid": 2107648, "ts": 5327097600778.561, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 2070552, "tid": 2107648, + "ts": 5327097600811.158, "dur": 39.630, + "args": { + "External id": 234723,"Record function id": 0, "Ev Idx": 3298 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 2070552, "tid": 2107648, + "ts": 5327097600858.502, "dur": 73.763, + "args": { + "External id": 234724,"Record function id": 0, "Ev Idx": 3299 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 2070552, "tid": 2107648, + "ts": 5327097600938.183, "dur": 35957.911, + "args": { + "External id": 234725,"Record function id": 0, "Ev Idx": 3300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097601044.985, "dur": 7.065, + "args": { + "External id": 234726,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097601062.342, "dur": 5.053, + "args": { + "External id": 234727,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097601081.934, "dur": 34996.437, + "args": { + "External id": 234728,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097601095.604, "dur": 34973.718, + "args": { + "External id": 234729,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097601128.509, "dur": 13.982, + "args": { + "External id": 234730,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097601148.509, "dur": 34879.705, + "args": { + "External id": 234731,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097601151.746, "dur": 34875.746, + "args": { + "External id": 234732,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097601155.547, "dur": 5.235, + "args": { + "External id": 234733,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097601162.354, "dur": 34861.744, + "args": { + "External id": 234734,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097636166.233, "dur": 9.225, + "args": { + "External id": 234735,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097636168.887, "dur": 6.278, + "args": { + "External id": 234736,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097636201.575, "dur": 378.529, + "args": { + "External id": 234737,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097636227.341, "dur": 347.642, + "args": { + "External id": 234738,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3313, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097636238.295, "dur": 330.982, + "args": { + "External id": 234739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097636599.966, "dur": 2.059, + "args": { + "External id": 234740,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3315, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097636693.513, "dur": 6.808, + "args": { + "External id": 234741,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097636743.797, "dur": 1.635, + "args": { + "External id": 234742,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097636760.905, "dur": 1.527, + "args": { + "External id": 234743,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097636774.550, "dur": 1.033, + "args": { + "External id": 234744,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097636787.975, "dur": 1.013, + "args": { + "External id": 234745,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097636799.457, "dur": 0.831, + "args": { + "External id": 234746,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097636811.783, "dur": 1.008, + "args": { + "External id": 234747,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097636823.927, "dur": 1.989, + "args": { + "External id": 234748,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097636836.028, "dur": 1.126, + "args": { + "External id": 234749,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097636928.249, "dur": 2647.235, + "args": { + "External id": 234750,"Record function id": 0, "Ev Idx": 3325 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 2070552, "tid": 2107648, + "ts": 5327097636947.687, "dur": 998.940, + "args": { + "External id": 234751,"Record function id": 0, "Ev Idx": 3326 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2070552, "tid": 2107648, + "ts": 5327097636962.280, "dur": 328.137, + "args": { + "External id": 234752,"Record function id": 0, "Ev Idx": 3327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097637065.720, "dur": 4.753, + "args": { + "External id": 234753,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097637073.876, "dur": 1.196, + "args": { + "External id": 234754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097637077.188, "dur": 0.884, + "args": { + "External id": 234755,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097637080.400, "dur": 1.023, + "args": { + "External id": 234756,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097637082.874, "dur": 0.998, + "args": { + "External id": 234757,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097637085.090, "dur": 0.909, + "args": { + "External id": 234758,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097637087.576, "dur": 1.487, + "args": { + "External id": 234759,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097637091.148, "dur": 0.766, + "args": { + "External id": 234760,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097637093.803, "dur": 1.075, + "args": { + "External id": 234761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097637096.584, "dur": 0.817, + "args": { + "External id": 234762,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097637116.228, "dur": 146.623, + "args": { + "External id": 234763,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097637132.702, "dur": 125.552, + "args": { + "External id": 234764,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097637146.970, "dur": 11.844, + "args": { + "External id": 234765,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097637162.491, "dur": 67.591, + "args": { + "External id": 234766,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097637165.278, "dur": 64.500, + "args": { + "External id": 234767,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637169.173, "dur": 6.886, + "args": { + "External id": 234768,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097637177.613, "dur": 51.540, + "args": { + "External id": 234769,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3344 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 2070552, "tid": 2107648, + "ts": 5327097637374.675, "dur": 564.719, + "args": { + "External id": 234770,"Record function id": 0, "Ev Idx": 3345 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2070552, "tid": 2107648, + "ts": 5327097637390.711, "dur": 536.752, + "args": { + "External id": 234771,"Record function id": 0, "Ev Idx": 3346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097637450.059, "dur": 4.877, + "args": { + "External id": 234772,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097637469.635, "dur": 28.990, + "args": { + "External id": 234773,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637474.026, "dur": 2.809, + "args": { + "External id": 234774,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637478.608, "dur": 0.611, + "args": { + "External id": 234775,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637480.171, "dur": 0.665, + "args": { + "External id": 234776,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637483.727, "dur": 0.590, + "args": { + "External id": 234777,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637485.960, "dur": 0.681, + "args": { + "External id": 234778,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637487.925, "dur": 0.791, + "args": { + "External id": 234779,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637490.897, "dur": 0.617, + "args": { + "External id": 234780,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637492.700, "dur": 0.631, + "args": { + "External id": 234781,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637494.480, "dur": 1.467, + "args": { + "External id": 234782,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097637508.267, "dur": 31.026, + "args": { + "External id": 234783,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097637568.323, "dur": 142.076, + "args": { + "External id": 234784,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097637577.456, "dur": 3.398, + "args": { + "External id": 234785,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097637585.316, "dur": 10.123, + "args": { + "External id": 234786,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097637589.259, "dur": 5.727, + "args": { + "External id": 234787,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637592.698, "dur": 1.121, + "args": { + "External id": 234788,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097637601.674, "dur": 62.366, + "args": { + "External id": 234789,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637603.446, "dur": 0.989, + "args": { + "External id": 234790,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637606.130, "dur": 0.709, + "args": { + "External id": 234791,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637607.641, "dur": 1.753, + "args": { + "External id": 234792,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637611.328, "dur": 0.420, + "args": { + "External id": 234793,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637613.267, "dur": 0.701, + "args": { + "External id": 234794,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637615.595, "dur": 0.576, + "args": { + "External id": 234795,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637617.408, "dur": 0.544, + "args": { + "External id": 234796,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637656.689, "dur": 0.815, + "args": { + "External id": 234797,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097637659.977, "dur": 0.673, + "args": { + "External id": 234798,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097637677.630, "dur": 24.715, + "args": { + "External id": 234799,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097637754.189, "dur": 110.643, + "args": { + "External id": 234800,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097637778.937, "dur": 82.603, + "args": { + "External id": 234801,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3376, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097637787.531, "dur": 69.634, + "args": { + "External id": 234802,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097637879.037, "dur": 2.028, + "args": { + "External id": 234803,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3378, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097637952.968, "dur": 1598.439, + "args": { + "External id": 234804,"Sequence number": 959124, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3379 + } + }, + { + "ph": "f", "id": 52, "pid": 2070552, "tid": 2107648, "ts": 5327097637952.968, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097638082.463, "dur": 104.391, + "args": { + "External id": 234805,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097638225.786, "dur": 39.967, + "args": { + "External id": 234806,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097638281.674, "dur": 48.684, + "args": { + "External id": 234807,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097638341.134, "dur": 32.210, + "args": { + "External id": 234808,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097638379.608, "dur": 45.530, + "args": { + "External id": 234809,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097638431.659, "dur": 29.112, + "args": { + "External id": 234810,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097638468.129, "dur": 41.426, + "args": { + "External id": 234811,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097638531.034, "dur": 23.663, + "args": { + "External id": 234812,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097638574.043, "dur": 27.418, + "args": { + "External id": 234813,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097638659.578, "dur": 23.898, + "args": { + "External id": 234814,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097638699.524, "dur": 15.411, + "args": { + "External id": 234815,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097638724.342, "dur": 34.434, + "args": { + "External id": 234816,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097638762.018, "dur": 33.106, + "args": { + "External id": 234817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097638822.468, "dur": 190.602, + "args": { + "External id": 234818,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097638898.174, "dur": 6.167, + "args": { + "External id": 234819,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097638906.027, "dur": 3.487, + "args": { + "External id": 234820,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097639047.608, "dur": 26.596, + "args": { + "External id": 234821,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097639086.297, "dur": 14.754, + "args": { + "External id": 234822,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097639109.986, "dur": 38.667, + "args": { + "External id": 234823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097639154.580, "dur": 34.596, + "args": { + "External id": 234824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097639195.645, "dur": 22.465, + "args": { + "External id": 234825,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097639224.084, "dur": 30.067, + "args": { + "External id": 234826,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097639260.105, "dur": 20.876, + "args": { + "External id": 234827,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097639288.635, "dur": 29.339, + "args": { + "External id": 234828,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097639334.962, "dur": 20.618, + "args": { + "External id": 234829,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097639372.809, "dur": 40.951, + "args": { + "External id": 234830,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097639436.025, "dur": 21.792, + "args": { + "External id": 234831,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097639477.759, "dur": 15.753, + "args": { + "External id": 234832,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097639505.495, "dur": 17.015, + "args": { + "External id": 234833,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639597.171, "dur": 14.984, + "args": { + "External id": 234834,"Record function id": 0, "Ev Idx": 3409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639600.546, "dur": 10.682, + "args": { + "External id": 234835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639604.573, "dur": 5.735, + "args": { + "External id": 234836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639605.835, "dur": 4.388, + "args": { + "External id": 234837,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639615.927, "dur": 45.638, + "args": { + "External id": 234838,"Record function id": 0, "Ev Idx": 3413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639617.383, "dur": 42.870, + "args": { + "External id": 234839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639655.662, "dur": 3.471, + "args": { + "External id": 234840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639656.616, "dur": 2.222, + "args": { + "External id": 234841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639666.915, "dur": 5.417, + "args": { + "External id": 234842,"Record function id": 0, "Ev Idx": 3417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639668.804, "dur": 3.094, + "args": { + "External id": 234843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639669.781, "dur": 1.668, + "args": { + "External id": 234844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639670.535, "dur": 0.829, + "args": { + "External id": 234845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639675.440, "dur": 4.409, + "args": { + "External id": 234846,"Record function id": 0, "Ev Idx": 3421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639676.810, "dur": 2.616, + "args": { + "External id": 234847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639677.470, "dur": 1.443, + "args": { + "External id": 234848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639677.933, "dur": 0.910, + "args": { + "External id": 234849,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639682.890, "dur": 7.365, + "args": { + "External id": 234850,"Record function id": 0, "Ev Idx": 3425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639683.958, "dur": 5.905, + "args": { + "External id": 234851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639684.469, "dur": 4.961, + "args": { + "External id": 234852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639684.892, "dur": 4.464, + "args": { + "External id": 234853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639693.323, "dur": 3.586, + "args": { + "External id": 234854,"Record function id": 0, "Ev Idx": 3429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639694.461, "dur": 2.061, + "args": { + "External id": 234855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639694.946, "dur": 1.179, + "args": { + "External id": 234856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639695.337, "dur": 0.717, + "args": { + "External id": 234857,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639700.127, "dur": 4.288, + "args": { + "External id": 234858,"Record function id": 0, "Ev Idx": 3433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639701.209, "dur": 2.812, + "args": { + "External id": 234859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639701.709, "dur": 1.920, + "args": { + "External id": 234860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639702.622, "dur": 0.940, + "args": { + "External id": 234861,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639707.441, "dur": 4.284, + "args": { + "External id": 234862,"Record function id": 0, "Ev Idx": 3437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639708.509, "dur": 2.816, + "args": { + "External id": 234863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639708.989, "dur": 1.913, + "args": { + "External id": 234864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639709.608, "dur": 1.220, + "args": { + "External id": 234865,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639714.757, "dur": 4.152, + "args": { + "External id": 234866,"Record function id": 0, "Ev Idx": 3441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097639715.833, "dur": 2.683, + "args": { + "External id": 234867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639716.469, "dur": 1.638, + "args": { + "External id": 234868,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097639717.054, "dur": 0.977, + "args": { + "External id": 234869,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097639723.301, "dur": 36527.798, + "args": { + "External id": 234870,"Record function id": 0, "Sequence number": 959123, "Fwd thread id": 1, "Ev Idx": 3445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097639724.504, "dur": 36516.825, + "args": { + "External id": 234871,"Sequence number": 959123, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3446 + } + }, + { + "ph": "f", "id": 53, "pid": 2070552, "tid": 2107648, "ts": 5327097639724.504, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 2070552, "tid": 2107648, + "ts": 5327097639755.474, "dur": 39.242, + "args": { + "External id": 234872,"Record function id": 0, "Ev Idx": 3447 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 2070552, "tid": 2107648, + "ts": 5327097639802.258, "dur": 69.869, + "args": { + "External id": 234873,"Record function id": 0, "Ev Idx": 3448 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 2070552, "tid": 2107648, + "ts": 5327097639878.158, "dur": 36355.337, + "args": { + "External id": 234874,"Record function id": 0, "Ev Idx": 3449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097639970.709, "dur": 20.153, + "args": { + "External id": 234875,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097640003.058, "dur": 4.851, + "args": { + "External id": 234876,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097640022.727, "dur": 35318.764, + "args": { + "External id": 234877,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097640035.676, "dur": 35297.466, + "args": { + "External id": 234878,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097640074.880, "dur": 14.608, + "args": { + "External id": 234879,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097640095.379, "dur": 35201.208, + "args": { + "External id": 234880,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097640099.052, "dur": 35196.908, + "args": { + "External id": 234881,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097640102.735, "dur": 5.072, + "args": { + "External id": 234882,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097640113.078, "dur": 35179.708, + "args": { + "External id": 234883,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097675427.556, "dur": 8.460, + "args": { + "External id": 234884,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097675430.178, "dur": 5.461, + "args": { + "External id": 234885,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097675463.101, "dur": 465.245, + "args": { + "External id": 234886,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097675488.306, "dur": 434.881, + "args": { + "External id": 234887,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3462, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097675499.232, "dur": 417.728, + "args": { + "External id": 234888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097675952.846, "dur": 2.130, + "args": { + "External id": 234889,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3464, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676032.767, "dur": 6.977, + "args": { + "External id": 234890,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676082.227, "dur": 1.237, + "args": { + "External id": 234891,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676098.714, "dur": 1.461, + "args": { + "External id": 234892,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676111.440, "dur": 1.174, + "args": { + "External id": 234893,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676124.374, "dur": 0.985, + "args": { + "External id": 234894,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676135.933, "dur": 1.091, + "args": { + "External id": 234895,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676148.285, "dur": 1.055, + "args": { + "External id": 234896,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676159.783, "dur": 2.248, + "args": { + "External id": 234897,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676171.540, "dur": 1.166, + "args": { + "External id": 234898,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097676265.588, "dur": 2636.194, + "args": { + "External id": 234899,"Record function id": 0, "Ev Idx": 3474 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 2070552, "tid": 2107648, + "ts": 5327097676284.320, "dur": 993.007, + "args": { + "External id": 234900,"Record function id": 0, "Ev Idx": 3475 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2070552, "tid": 2107648, + "ts": 5327097676298.772, "dur": 296.178, + "args": { + "External id": 234901,"Record function id": 0, "Ev Idx": 3476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097676380.108, "dur": 4.073, + "args": { + "External id": 234902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097676387.292, "dur": 0.951, + "args": { + "External id": 234903,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097676389.778, "dur": 1.064, + "args": { + "External id": 234904,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097676392.299, "dur": 0.846, + "args": { + "External id": 234905,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097676394.969, "dur": 1.408, + "args": { + "External id": 234906,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097676397.880, "dur": 0.785, + "args": { + "External id": 234907,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097676400.018, "dur": 1.049, + "args": { + "External id": 234908,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097676402.791, "dur": 1.136, + "args": { + "External id": 234909,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097676405.406, "dur": 1.397, + "args": { + "External id": 234910,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097676408.124, "dur": 1.460, + "args": { + "External id": 234911,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097676427.547, "dur": 142.294, + "args": { + "External id": 234912,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097676443.360, "dur": 122.342, + "args": { + "External id": 234913,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097676456.728, "dur": 11.899, + "args": { + "External id": 234914,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097676472.505, "dur": 64.761, + "args": { + "External id": 234915,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097676474.815, "dur": 62.082, + "args": { + "External id": 234916,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676478.707, "dur": 6.206, + "args": { + "External id": 234917,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097676486.225, "dur": 50.140, + "args": { + "External id": 234918,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3493 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 2070552, "tid": 2107648, + "ts": 5327097676722.702, "dur": 547.675, + "args": { + "External id": 234919,"Record function id": 0, "Ev Idx": 3494 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2070552, "tid": 2107648, + "ts": 5327097676740.920, "dur": 518.197, + "args": { + "External id": 234920,"Record function id": 0, "Ev Idx": 3495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097676802.796, "dur": 5.617, + "args": { + "External id": 234921,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097676823.501, "dur": 30.858, + "args": { + "External id": 234922,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676828.221, "dur": 2.222, + "args": { + "External id": 234923,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676832.462, "dur": 0.850, + "args": { + "External id": 234924,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676834.976, "dur": 1.049, + "args": { + "External id": 234925,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676838.962, "dur": 0.562, + "args": { + "External id": 234926,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676841.008, "dur": 0.642, + "args": { + "External id": 234927,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676842.900, "dur": 1.087, + "args": { + "External id": 234928,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676846.180, "dur": 0.671, + "args": { + "External id": 234929,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676848.051, "dur": 0.796, + "args": { + "External id": 234930,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676849.677, "dur": 1.309, + "args": { + "External id": 234931,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097676864.738, "dur": 33.771, + "args": { + "External id": 234932,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097676930.243, "dur": 118.993, + "args": { + "External id": 234933,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097676939.729, "dur": 4.194, + "args": { + "External id": 234934,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097676948.729, "dur": 10.074, + "args": { + "External id": 234935,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097676952.625, "dur": 5.737, + "args": { + "External id": 234936,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676956.125, "dur": 0.794, + "args": { + "External id": 234937,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097676966.098, "dur": 41.306, + "args": { + "External id": 234938,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676967.957, "dur": 0.739, + "args": { + "External id": 234939,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676970.312, "dur": 1.157, + "args": { + "External id": 234940,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676972.489, "dur": 0.941, + "args": { + "External id": 234941,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676974.643, "dur": 0.583, + "args": { + "External id": 234942,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676992.078, "dur": 0.935, + "args": { + "External id": 234943,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676995.139, "dur": 0.829, + "args": { + "External id": 234944,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676996.816, "dur": 0.945, + "args": { + "External id": 234945,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097676998.773, "dur": 0.943, + "args": { + "External id": 234946,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097677000.909, "dur": 0.578, + "args": { + "External id": 234947,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097677018.662, "dur": 23.219, + "args": { + "External id": 234948,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097677092.715, "dur": 104.372, + "args": { + "External id": 234949,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097677114.284, "dur": 79.661, + "args": { + "External id": 234950,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3525, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097677122.892, "dur": 67.181, + "args": { + "External id": 234951,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097677210.931, "dur": 1.685, + "args": { + "External id": 234952,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3527, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097677284.277, "dur": 1595.526, + "args": { + "External id": 234953,"Sequence number": 959122, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3528 + } + }, + { + "ph": "f", "id": 54, "pid": 2070552, "tid": 2107648, "ts": 5327097677284.277, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097677389.810, "dur": 102.380, + "args": { + "External id": 234954,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097677528.331, "dur": 39.144, + "args": { + "External id": 234955,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097677584.829, "dur": 88.926, + "args": { + "External id": 234956,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097677688.644, "dur": 37.806, + "args": { + "External id": 234957,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097677732.398, "dur": 45.571, + "args": { + "External id": 234958,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097677784.922, "dur": 27.580, + "args": { + "External id": 234959,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097677819.480, "dur": 42.526, + "args": { + "External id": 234960,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097677883.838, "dur": 22.457, + "args": { + "External id": 234961,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097677925.298, "dur": 27.264, + "args": { + "External id": 234962,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097677972.121, "dur": 36.759, + "args": { + "External id": 234963,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097678028.008, "dur": 15.352, + "args": { + "External id": 234964,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097678053.460, "dur": 36.266, + "args": { + "External id": 234965,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097678093.231, "dur": 32.482, + "args": { + "External id": 234966,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097678156.011, "dur": 164.246, + "args": { + "External id": 234967,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097678227.409, "dur": 6.736, + "args": { + "External id": 234968,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097678235.862, "dur": 3.189, + "args": { + "External id": 234969,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097678350.842, "dur": 25.756, + "args": { + "External id": 234970,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097678388.176, "dur": 13.804, + "args": { + "External id": 234971,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097678409.796, "dur": 32.907, + "args": { + "External id": 234972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097678448.918, "dur": 32.703, + "args": { + "External id": 234973,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097678487.396, "dur": 21.849, + "args": { + "External id": 234974,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097678514.068, "dur": 29.332, + "args": { + "External id": 234975,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097678548.851, "dur": 21.394, + "args": { + "External id": 234976,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097678577.336, "dur": 30.376, + "args": { + "External id": 234977,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097678664.910, "dur": 25.357, + "args": { + "External id": 234978,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097678708.385, "dur": 37.470, + "args": { + "External id": 234979,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097678767.912, "dur": 20.095, + "args": { + "External id": 234980,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097678804.859, "dur": 14.820, + "args": { + "External id": 234981,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097678831.257, "dur": 15.753, + "args": { + "External id": 234982,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097678926.536, "dur": 16.027, + "args": { + "External id": 234983,"Record function id": 0, "Ev Idx": 3558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097678929.710, "dur": 11.924, + "args": { + "External id": 234984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097678934.034, "dur": 6.833, + "args": { + "External id": 234985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097678935.399, "dur": 5.333, + "args": { + "External id": 234986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097678946.243, "dur": 4.806, + "args": { + "External id": 234987,"Record function id": 0, "Ev Idx": 3562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097678947.671, "dur": 2.958, + "args": { + "External id": 234988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097678948.393, "dur": 1.731, + "args": { + "External id": 234989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097678948.999, "dur": 1.032, + "args": { + "External id": 234990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097678954.151, "dur": 4.614, + "args": { + "External id": 234991,"Record function id": 0, "Ev Idx": 3566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097678955.837, "dur": 2.505, + "args": { + "External id": 234992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097678956.406, "dur": 1.508, + "args": { + "External id": 234993,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097678956.845, "dur": 0.993, + "args": { + "External id": 234994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097678961.840, "dur": 3.979, + "args": { + "External id": 234995,"Record function id": 0, "Ev Idx": 3570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097678962.696, "dur": 2.726, + "args": { + "External id": 234996,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097678963.197, "dur": 1.662, + "args": { + "External id": 234997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097678963.563, "dur": 1.220, + "args": { + "External id": 234998,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097678968.838, "dur": 21.279, + "args": { + "External id": 234999,"Record function id": 0, "Ev Idx": 3574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097678969.990, "dur": 5.775, + "args": { + "External id": 235000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097678973.798, "dur": 1.339, + "args": { + "External id": 235001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097678974.302, "dur": 0.743, + "args": { + "External id": 235002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097678996.464, "dur": 6.175, + "args": { + "External id": 235003,"Record function id": 0, "Ev Idx": 3578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097678998.076, "dur": 4.094, + "args": { + "External id": 235004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097678999.076, "dur": 2.376, + "args": { + "External id": 235005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097678999.649, "dur": 1.715, + "args": { + "External id": 235006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097679005.751, "dur": 10.816, + "args": { + "External id": 235007,"Record function id": 0, "Ev Idx": 3582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097679007.240, "dur": 8.849, + "args": { + "External id": 235008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097679013.955, "dur": 1.541, + "args": { + "External id": 235009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097679014.301, "dur": 1.081, + "args": { + "External id": 235010,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097679019.802, "dur": 3.630, + "args": { + "External id": 235011,"Record function id": 0, "Ev Idx": 3586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097679020.833, "dur": 2.184, + "args": { + "External id": 235012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097679021.309, "dur": 1.329, + "args": { + "External id": 235013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097679021.588, "dur": 0.983, + "args": { + "External id": 235014,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097679026.626, "dur": 7.476, + "args": { + "External id": 235015,"Record function id": 0, "Ev Idx": 3590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097679028.002, "dur": 5.683, + "args": { + "External id": 235016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097679028.479, "dur": 4.656, + "args": { + "External id": 235017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097679028.836, "dur": 4.226, + "args": { + "External id": 235018,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097679038.128, "dur": 37014.964, + "args": { + "External id": 235019,"Record function id": 0, "Sequence number": 959121, "Fwd thread id": 1, "Ev Idx": 3594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097679039.559, "dur": 37004.555, + "args": { + "External id": 235020,"Sequence number": 959121, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3595 + } + }, + { + "ph": "f", "id": 55, "pid": 2070552, "tid": 2107648, "ts": 5327097679039.559, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 2070552, "tid": 2107648, + "ts": 5327097679068.282, "dur": 41.621, + "args": { + "External id": 235021,"Record function id": 0, "Ev Idx": 3596 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 2070552, "tid": 2107648, + "ts": 5327097679117.626, "dur": 67.640, + "args": { + "External id": 235022,"Record function id": 0, "Ev Idx": 3597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 2070552, "tid": 2107648, + "ts": 5327097679190.975, "dur": 36845.271, + "args": { + "External id": 235023,"Record function id": 0, "Ev Idx": 3598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097679284.476, "dur": 6.433, + "args": { + "External id": 235024,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097679300.124, "dur": 4.821, + "args": { + "External id": 235025,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097679318.458, "dur": 35844.016, + "args": { + "External id": 235026,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097679331.010, "dur": 35822.081, + "args": { + "External id": 235027,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097679383.288, "dur": 13.793, + "args": { + "External id": 235028,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097679403.068, "dur": 35708.899, + "args": { + "External id": 235029,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097679405.324, "dur": 35706.037, + "args": { + "External id": 235030,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097679409.532, "dur": 5.098, + "args": { + "External id": 235031,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097679416.064, "dur": 35691.978, + "args": { + "External id": 235032,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097715251.607, "dur": 8.591, + "args": { + "External id": 235033,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097715254.112, "dur": 5.776, + "args": { + "External id": 235034,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097715287.230, "dur": 446.314, + "args": { + "External id": 235035,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097715310.874, "dur": 417.585, + "args": { + "External id": 235036,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3611, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097715320.634, "dur": 401.971, + "args": { + "External id": 235037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097715755.146, "dur": 2.239, + "args": { + "External id": 235038,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3613, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097715819.312, "dur": 6.575, + "args": { + "External id": 235039,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097715867.716, "dur": 1.345, + "args": { + "External id": 235040,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097715883.768, "dur": 1.245, + "args": { + "External id": 235041,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097715896.695, "dur": 0.911, + "args": { + "External id": 235042,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097715910.909, "dur": 1.055, + "args": { + "External id": 235043,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097715923.962, "dur": 0.935, + "args": { + "External id": 235044,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097715935.492, "dur": 0.994, + "args": { + "External id": 235045,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097715946.813, "dur": 1.734, + "args": { + "External id": 235046,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097715958.777, "dur": 1.284, + "args": { + "External id": 235047,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097716069.710, "dur": 2683.078, + "args": { + "External id": 235048,"Record function id": 0, "Ev Idx": 3623 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 2070552, "tid": 2107648, + "ts": 5327097716089.613, "dur": 1006.190, + "args": { + "External id": 235049,"Record function id": 0, "Ev Idx": 3624 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2070552, "tid": 2107648, + "ts": 5327097716104.476, "dur": 305.150, + "args": { + "External id": 235050,"Record function id": 0, "Ev Idx": 3625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097716188.914, "dur": 4.665, + "args": { + "External id": 235051,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097716197.031, "dur": 1.466, + "args": { + "External id": 235052,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097716200.120, "dur": 1.265, + "args": { + "External id": 235053,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097716202.646, "dur": 1.428, + "args": { + "External id": 235054,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097716205.747, "dur": 1.469, + "args": { + "External id": 235055,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097716208.522, "dur": 1.592, + "args": { + "External id": 235056,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097716212.008, "dur": 1.747, + "args": { + "External id": 235057,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097716215.050, "dur": 1.780, + "args": { + "External id": 235058,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097716218.267, "dur": 1.423, + "args": { + "External id": 235059,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097716221.255, "dur": 1.726, + "args": { + "External id": 235060,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097716240.599, "dur": 141.933, + "args": { + "External id": 235061,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097716256.205, "dur": 122.087, + "args": { + "External id": 235062,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097716268.694, "dur": 13.410, + "args": { + "External id": 235063,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097716285.637, "dur": 65.071, + "args": { + "External id": 235064,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097716287.848, "dur": 62.585, + "args": { + "External id": 235065,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716291.478, "dur": 6.622, + "args": { + "External id": 235066,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097716299.930, "dur": 49.843, + "args": { + "External id": 235067,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3642 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 2070552, "tid": 2107648, + "ts": 5327097716495.000, "dur": 592.986, + "args": { + "External id": 235068,"Record function id": 0, "Ev Idx": 3643 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2070552, "tid": 2107648, + "ts": 5327097716510.282, "dur": 564.913, + "args": { + "External id": 235069,"Record function id": 0, "Ev Idx": 3644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097716567.035, "dur": 4.566, + "args": { + "External id": 235070,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097716586.786, "dur": 31.454, + "args": { + "External id": 235071,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716590.946, "dur": 1.840, + "args": { + "External id": 235072,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716594.597, "dur": 0.759, + "args": { + "External id": 235073,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716596.914, "dur": 0.789, + "args": { + "External id": 235074,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716599.845, "dur": 0.764, + "args": { + "External id": 235075,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716602.910, "dur": 0.579, + "args": { + "External id": 235076,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716604.771, "dur": 0.718, + "args": { + "External id": 235077,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716606.747, "dur": 0.710, + "args": { + "External id": 235078,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716609.028, "dur": 0.637, + "args": { + "External id": 235079,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716611.088, "dur": 0.609, + "args": { + "External id": 235080,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097716665.503, "dur": 36.849, + "args": { + "External id": 235081,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097716734.119, "dur": 101.144, + "args": { + "External id": 235082,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097716744.598, "dur": 4.067, + "args": { + "External id": 235083,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097716753.496, "dur": 10.549, + "args": { + "External id": 235084,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097716757.545, "dur": 6.081, + "args": { + "External id": 235085,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716760.864, "dur": 0.807, + "args": { + "External id": 235086,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097716771.256, "dur": 26.782, + "args": { + "External id": 235087,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716773.097, "dur": 0.871, + "args": { + "External id": 235088,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716775.479, "dur": 0.969, + "args": { + "External id": 235089,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716777.924, "dur": 0.636, + "args": { + "External id": 235090,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716780.094, "dur": 0.617, + "args": { + "External id": 235091,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716782.121, "dur": 0.669, + "args": { + "External id": 235092,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716784.228, "dur": 0.898, + "args": { + "External id": 235093,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716786.451, "dur": 0.624, + "args": { + "External id": 235094,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716788.527, "dur": 0.700, + "args": { + "External id": 235095,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097716790.615, "dur": 0.841, + "args": { + "External id": 235096,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097716808.021, "dur": 19.651, + "args": { + "External id": 235097,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097716879.568, "dur": 126.411, + "args": { + "External id": 235098,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097716901.074, "dur": 101.393, + "args": { + "External id": 235099,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3674, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097716910.079, "dur": 85.284, + "args": { + "External id": 235100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097717021.902, "dur": 2.165, + "args": { + "External id": 235101,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3676, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097717103.157, "dur": 1624.532, + "args": { + "External id": 235102,"Sequence number": 959120, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3677 + } + }, + { + "ph": "f", "id": 56, "pid": 2070552, "tid": 2107648, "ts": 5327097717103.157, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097717211.675, "dur": 105.390, + "args": { + "External id": 235103,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097717354.355, "dur": 39.353, + "args": { + "External id": 235104,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097717410.179, "dur": 50.897, + "args": { + "External id": 235105,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097717471.110, "dur": 32.480, + "args": { + "External id": 235106,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097717510.017, "dur": 47.567, + "args": { + "External id": 235107,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097717563.912, "dur": 27.481, + "args": { + "External id": 235108,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097717599.525, "dur": 86.499, + "args": { + "External id": 235109,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097717713.199, "dur": 25.195, + "args": { + "External id": 235110,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097717756.686, "dur": 28.448, + "args": { + "External id": 235111,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097717805.706, "dur": 19.061, + "args": { + "External id": 235112,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097717838.505, "dur": 16.052, + "args": { + "External id": 235113,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097717863.743, "dur": 33.226, + "args": { + "External id": 235114,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097717900.486, "dur": 32.811, + "args": { + "External id": 235115,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097717961.195, "dur": 189.865, + "args": { + "External id": 235116,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097718054.265, "dur": 7.260, + "args": { + "External id": 235117,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097718063.572, "dur": 3.625, + "args": { + "External id": 235118,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097718184.356, "dur": 26.751, + "args": { + "External id": 235119,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097718222.180, "dur": 14.961, + "args": { + "External id": 235120,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097718245.478, "dur": 41.460, + "args": { + "External id": 235121,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097718292.904, "dur": 36.363, + "args": { + "External id": 235122,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097718335.826, "dur": 22.201, + "args": { + "External id": 235123,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097718362.865, "dur": 30.227, + "args": { + "External id": 235124,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097718400.915, "dur": 21.395, + "args": { + "External id": 235125,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097718429.727, "dur": 29.253, + "args": { + "External id": 235126,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097718473.934, "dur": 22.562, + "args": { + "External id": 235127,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097718528.526, "dur": 29.924, + "args": { + "External id": 235128,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097718573.295, "dur": 17.359, + "args": { + "External id": 235129,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097718605.584, "dur": 54.312, + "args": { + "External id": 235130,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097718677.202, "dur": 18.268, + "args": { + "External id": 235131,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718775.209, "dur": 15.700, + "args": { + "External id": 235132,"Record function id": 0, "Ev Idx": 3707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718778.313, "dur": 11.515, + "args": { + "External id": 235133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718782.426, "dur": 6.522, + "args": { + "External id": 235134,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718783.527, "dur": 5.332, + "args": { + "External id": 235135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718794.658, "dur": 4.873, + "args": { + "External id": 235136,"Record function id": 0, "Ev Idx": 3711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718795.785, "dur": 3.310, + "args": { + "External id": 235137,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718796.824, "dur": 1.521, + "args": { + "External id": 235138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718797.128, "dur": 1.142, + "args": { + "External id": 235139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718802.621, "dur": 3.782, + "args": { + "External id": 235140,"Record function id": 0, "Ev Idx": 3715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718803.658, "dur": 2.343, + "args": { + "External id": 235141,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718804.116, "dur": 1.468, + "args": { + "External id": 235142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718804.482, "dur": 1.033, + "args": { + "External id": 235143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718809.680, "dur": 4.105, + "args": { + "External id": 235144,"Record function id": 0, "Ev Idx": 3719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718810.737, "dur": 2.644, + "args": { + "External id": 235145,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718811.679, "dur": 1.261, + "args": { + "External id": 235146,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718812.046, "dur": 0.816, + "args": { + "External id": 235147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718816.940, "dur": 3.917, + "args": { + "External id": 235148,"Record function id": 0, "Ev Idx": 3723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718818.084, "dur": 2.336, + "args": { + "External id": 235149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718818.530, "dur": 1.433, + "args": { + "External id": 235150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718818.889, "dur": 0.999, + "args": { + "External id": 235151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718823.926, "dur": 3.885, + "args": { + "External id": 235152,"Record function id": 0, "Ev Idx": 3727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718825.218, "dur": 2.160, + "args": { + "External id": 235153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718825.699, "dur": 1.238, + "args": { + "External id": 235154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718826.084, "dur": 0.775, + "args": { + "External id": 235155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718831.022, "dur": 3.484, + "args": { + "External id": 235156,"Record function id": 0, "Ev Idx": 3731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718832.084, "dur": 2.003, + "args": { + "External id": 235157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718832.533, "dur": 1.134, + "args": { + "External id": 235158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718832.828, "dur": 0.737, + "args": { + "External id": 235159,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718837.985, "dur": 3.523, + "args": { + "External id": 235160,"Record function id": 0, "Ev Idx": 3735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718839.041, "dur": 2.050, + "args": { + "External id": 235161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718839.504, "dur": 1.100, + "args": { + "External id": 235162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718839.757, "dur": 0.776, + "args": { + "External id": 235163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718844.794, "dur": 4.101, + "args": { + "External id": 235164,"Record function id": 0, "Ev Idx": 3739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097718845.726, "dur": 2.752, + "args": { + "External id": 235165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718846.552, "dur": 1.470, + "args": { + "External id": 235166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097718846.927, "dur": 1.026, + "args": { + "External id": 235167,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097718852.835, "dur": 36757.913, + "args": { + "External id": 235168,"Record function id": 0, "Sequence number": 959119, "Fwd thread id": 1, "Ev Idx": 3743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097718854.138, "dur": 36747.982, + "args": { + "External id": 235169,"Sequence number": 959119, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3744 + } + }, + { + "ph": "f", "id": 57, "pid": 2070552, "tid": 2107648, "ts": 5327097718854.138, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 2070552, "tid": 2107648, + "ts": 5327097718882.053, "dur": 41.983, + "args": { + "External id": 235170,"Record function id": 0, "Ev Idx": 3745 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 2070552, "tid": 2107648, + "ts": 5327097718931.315, "dur": 96.852, + "args": { + "External id": 235171,"Record function id": 0, "Ev Idx": 3746 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 2070552, "tid": 2107648, + "ts": 5327097719035.718, "dur": 36559.328, + "args": { + "External id": 235172,"Record function id": 0, "Ev Idx": 3747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097719126.644, "dur": 7.087, + "args": { + "External id": 235173,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097719147.080, "dur": 5.306, + "args": { + "External id": 235174,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097719165.987, "dur": 35614.131, + "args": { + "External id": 235175,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097719178.995, "dur": 35591.981, + "args": { + "External id": 235176,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097719233.047, "dur": 13.925, + "args": { + "External id": 235177,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097719256.232, "dur": 35477.403, + "args": { + "External id": 235178,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097719259.007, "dur": 35473.988, + "args": { + "External id": 235179,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097719262.787, "dur": 5.563, + "args": { + "External id": 235180,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097719270.100, "dur": 35459.636, + "args": { + "External id": 235181,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097754868.507, "dur": 7.938, + "args": { + "External id": 235182,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097754870.854, "dur": 5.306, + "args": { + "External id": 235183,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097754903.881, "dur": 414.082, + "args": { + "External id": 235184,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097754930.021, "dur": 382.770, + "args": { + "External id": 235185,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3760, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097754940.403, "dur": 366.388, + "args": { + "External id": 235186,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097755338.613, "dur": 2.455, + "args": { + "External id": 235187,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3762, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097755400.154, "dur": 6.617, + "args": { + "External id": 235188,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097755446.944, "dur": 1.811, + "args": { + "External id": 235189,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097755463.179, "dur": 1.472, + "args": { + "External id": 235190,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097755475.045, "dur": 1.245, + "args": { + "External id": 235191,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097755488.172, "dur": 1.479, + "args": { + "External id": 235192,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097755499.468, "dur": 1.101, + "args": { + "External id": 235193,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097755511.235, "dur": 1.358, + "args": { + "External id": 235194,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097755523.417, "dur": 1.258, + "args": { + "External id": 235195,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097755534.860, "dur": 1.310, + "args": { + "External id": 235196,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097755659.598, "dur": 2647.282, + "args": { + "External id": 235197,"Record function id": 0, "Ev Idx": 3772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 2070552, "tid": 2107648, + "ts": 5327097755682.000, "dur": 1008.644, + "args": { + "External id": 235198,"Record function id": 0, "Ev Idx": 3773 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2070552, "tid": 2107648, + "ts": 5327097755695.770, "dur": 328.389, + "args": { + "External id": 235199,"Record function id": 0, "Ev Idx": 3774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097755779.958, "dur": 5.037, + "args": { + "External id": 235200,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097755788.513, "dur": 1.314, + "args": { + "External id": 235201,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097755791.526, "dur": 1.100, + "args": { + "External id": 235202,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097755794.217, "dur": 1.469, + "args": { + "External id": 235203,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097755797.331, "dur": 1.199, + "args": { + "External id": 235204,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097755799.853, "dur": 1.350, + "args": { + "External id": 235205,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097755803.195, "dur": 1.051, + "args": { + "External id": 235206,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097755805.857, "dur": 1.318, + "args": { + "External id": 235207,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097755808.989, "dur": 0.912, + "args": { + "External id": 235208,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097755811.290, "dur": 1.146, + "args": { + "External id": 235209,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097755830.385, "dur": 143.730, + "args": { + "External id": 235210,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097755845.579, "dur": 124.090, + "args": { + "External id": 235211,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097755857.982, "dur": 12.448, + "args": { + "External id": 235212,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097755874.552, "dur": 67.369, + "args": { + "External id": 235213,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097755877.183, "dur": 64.351, + "args": { + "External id": 235214,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097755881.234, "dur": 6.195, + "args": { + "External id": 235215,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097755888.993, "dur": 51.977, + "args": { + "External id": 235216,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3791 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 2070552, "tid": 2107648, + "ts": 5327097756111.809, "dur": 571.172, + "args": { + "External id": 235217,"Record function id": 0, "Ev Idx": 3792 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2070552, "tid": 2107648, + "ts": 5327097756129.904, "dur": 539.080, + "args": { + "External id": 235218,"Record function id": 0, "Ev Idx": 3793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097756192.858, "dur": 5.767, + "args": { + "External id": 235219,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097756213.667, "dur": 30.012, + "args": { + "External id": 235220,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756217.946, "dur": 1.900, + "args": { + "External id": 235221,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756221.482, "dur": 0.745, + "args": { + "External id": 235222,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756223.858, "dur": 0.928, + "args": { + "External id": 235223,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756225.999, "dur": 0.764, + "args": { + "External id": 235224,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756228.328, "dur": 0.648, + "args": { + "External id": 235225,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756230.321, "dur": 0.748, + "args": { + "External id": 235226,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756232.598, "dur": 0.755, + "args": { + "External id": 235227,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756234.663, "dur": 0.670, + "args": { + "External id": 235228,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756236.676, "dur": 0.845, + "args": { + "External id": 235229,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097756253.402, "dur": 36.238, + "args": { + "External id": 235230,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097756318.338, "dur": 95.948, + "args": { + "External id": 235231,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097756327.781, "dur": 2.934, + "args": { + "External id": 235232,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097756335.801, "dur": 10.262, + "args": { + "External id": 235233,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097756339.523, "dur": 6.105, + "args": { + "External id": 235234,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756342.790, "dur": 1.162, + "args": { + "External id": 235235,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097756352.292, "dur": 25.844, + "args": { + "External id": 235236,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756354.019, "dur": 0.541, + "args": { + "External id": 235237,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756355.982, "dur": 0.870, + "args": { + "External id": 235238,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756358.932, "dur": 0.793, + "args": { + "External id": 235239,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756360.619, "dur": 0.700, + "args": { + "External id": 235240,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756362.592, "dur": 0.945, + "args": { + "External id": 235241,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756364.685, "dur": 0.683, + "args": { + "External id": 235242,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756366.522, "dur": 0.738, + "args": { + "External id": 235243,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756368.981, "dur": 0.824, + "args": { + "External id": 235244,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097756370.999, "dur": 0.843, + "args": { + "External id": 235245,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097756386.852, "dur": 19.392, + "args": { + "External id": 235246,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097756456.643, "dur": 107.659, + "args": { + "External id": 235247,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097756479.227, "dur": 81.755, + "args": { + "External id": 235248,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3823, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097756488.512, "dur": 68.448, + "args": { + "External id": 235249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097756579.342, "dur": 1.703, + "args": { + "External id": 235250,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3825, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097756698.078, "dur": 1586.061, + "args": { + "External id": 235251,"Sequence number": 959118, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3826 + } + }, + { + "ph": "f", "id": 58, "pid": 2070552, "tid": 2107648, "ts": 5327097756698.078, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097756809.153, "dur": 106.635, + "args": { + "External id": 235252,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097756953.043, "dur": 56.830, + "args": { + "External id": 235253,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097757029.437, "dur": 52.135, + "args": { + "External id": 235254,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097757092.562, "dur": 32.241, + "args": { + "External id": 235255,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097757131.305, "dur": 45.157, + "args": { + "External id": 235256,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097757183.036, "dur": 28.405, + "args": { + "External id": 235257,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097757218.924, "dur": 42.610, + "args": { + "External id": 235258,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097757283.799, "dur": 22.777, + "args": { + "External id": 235259,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097757325.021, "dur": 26.787, + "args": { + "External id": 235260,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097757371.954, "dur": 18.250, + "args": { + "External id": 235261,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097757402.012, "dur": 15.511, + "args": { + "External id": 235262,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097757427.042, "dur": 29.212, + "args": { + "External id": 235263,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097757459.132, "dur": 31.943, + "args": { + "External id": 235264,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097757517.444, "dur": 202.296, + "args": { + "External id": 235265,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097757587.110, "dur": 6.281, + "args": { + "External id": 235266,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097757595.399, "dur": 3.608, + "args": { + "External id": 235267,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097757754.644, "dur": 26.751, + "args": { + "External id": 235268,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097757793.553, "dur": 14.675, + "args": { + "External id": 235269,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097757816.906, "dur": 40.396, + "args": { + "External id": 235270,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097757863.270, "dur": 34.353, + "args": { + "External id": 235271,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097757904.027, "dur": 22.199, + "args": { + "External id": 235272,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097757930.340, "dur": 29.023, + "args": { + "External id": 235273,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097757964.527, "dur": 37.248, + "args": { + "External id": 235274,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097758012.222, "dur": 32.120, + "args": { + "External id": 235275,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097758076.217, "dur": 29.996, + "args": { + "External id": 235276,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097758130.864, "dur": 23.688, + "args": { + "External id": 235277,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097758173.461, "dur": 20.189, + "args": { + "External id": 235278,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097758210.172, "dur": 14.214, + "args": { + "External id": 235279,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097758237.347, "dur": 19.020, + "args": { + "External id": 235280,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758328.071, "dur": 14.469, + "args": { + "External id": 235281,"Record function id": 0, "Ev Idx": 3856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758331.191, "dur": 10.414, + "args": { + "External id": 235282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758335.267, "dur": 5.481, + "args": { + "External id": 235283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758336.386, "dur": 4.264, + "args": { + "External id": 235284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758346.404, "dur": 4.360, + "args": { + "External id": 235285,"Record function id": 0, "Ev Idx": 3860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758347.755, "dur": 2.592, + "args": { + "External id": 235286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758348.557, "dur": 1.343, + "args": { + "External id": 235287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758348.907, "dur": 0.890, + "args": { + "External id": 235288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758354.013, "dur": 3.903, + "args": { + "External id": 235289,"Record function id": 0, "Ev Idx": 3864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758355.173, "dur": 2.318, + "args": { + "External id": 235290,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758355.669, "dur": 1.272, + "args": { + "External id": 235291,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758356.089, "dur": 0.761, + "args": { + "External id": 235292,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758361.077, "dur": 5.719, + "args": { + "External id": 235293,"Record function id": 0, "Ev Idx": 3868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758362.306, "dur": 4.087, + "args": { + "External id": 235294,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758362.841, "dur": 2.844, + "args": { + "External id": 235295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758364.933, "dur": 0.672, + "args": { + "External id": 235296,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758369.879, "dur": 4.239, + "args": { + "External id": 235297,"Record function id": 0, "Ev Idx": 3872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758370.824, "dur": 2.860, + "args": { + "External id": 235298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758371.319, "dur": 1.787, + "args": { + "External id": 235299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758372.007, "dur": 1.028, + "args": { + "External id": 235300,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758377.204, "dur": 4.334, + "args": { + "External id": 235301,"Record function id": 0, "Ev Idx": 3876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758378.756, "dur": 2.368, + "args": { + "External id": 235302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758379.286, "dur": 1.396, + "args": { + "External id": 235303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758379.615, "dur": 0.991, + "args": { + "External id": 235304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758384.640, "dur": 4.068, + "args": { + "External id": 235305,"Record function id": 0, "Ev Idx": 3880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758385.939, "dur": 2.354, + "args": { + "External id": 235306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758386.649, "dur": 1.210, + "args": { + "External id": 235307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758386.994, "dur": 0.792, + "args": { + "External id": 235308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758391.991, "dur": 3.790, + "args": { + "External id": 235309,"Record function id": 0, "Ev Idx": 3884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758393.071, "dur": 2.277, + "args": { + "External id": 235310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758393.579, "dur": 1.339, + "args": { + "External id": 235311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758393.981, "dur": 0.861, + "args": { + "External id": 235312,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758398.845, "dur": 3.855, + "args": { + "External id": 235313,"Record function id": 0, "Ev Idx": 3888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097758399.767, "dur": 2.521, + "args": { + "External id": 235314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758400.243, "dur": 1.372, + "args": { + "External id": 235315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097758400.599, "dur": 0.942, + "args": { + "External id": 235316,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097758406.372, "dur": 36422.874, + "args": { + "External id": 235317,"Record function id": 0, "Sequence number": 959117, "Fwd thread id": 1, "Ev Idx": 3892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097758407.819, "dur": 36411.677, + "args": { + "External id": 235318,"Sequence number": 959117, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3893 + } + }, + { + "ph": "f", "id": 59, "pid": 2070552, "tid": 2107648, "ts": 5327097758407.819, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 2070552, "tid": 2107648, + "ts": 5327097758435.389, "dur": 36.988, + "args": { + "External id": 235319,"Record function id": 0, "Ev Idx": 3894 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 2070552, "tid": 2107648, + "ts": 5327097758479.458, "dur": 65.323, + "args": { + "External id": 235320,"Record function id": 0, "Ev Idx": 3895 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 2070552, "tid": 2107648, + "ts": 5327097758550.565, "dur": 36261.302, + "args": { + "External id": 235321,"Record function id": 0, "Ev Idx": 3896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097758683.766, "dur": 11.495, + "args": { + "External id": 235322,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097758705.388, "dur": 5.716, + "args": { + "External id": 235323,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097758725.626, "dur": 35285.261, + "args": { + "External id": 235324,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097758738.510, "dur": 35263.579, + "args": { + "External id": 235325,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097758788.203, "dur": 14.200, + "args": { + "External id": 235326,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097758808.473, "dur": 35145.253, + "args": { + "External id": 235327,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097758810.913, "dur": 35142.102, + "args": { + "External id": 235328,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097758814.413, "dur": 5.529, + "args": { + "External id": 235329,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097758821.664, "dur": 35128.385, + "args": { + "External id": 235330,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097794096.294, "dur": 8.078, + "args": { + "External id": 235331,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097794098.786, "dur": 5.255, + "args": { + "External id": 235332,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097794131.849, "dur": 361.466, + "args": { + "External id": 235333,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097794156.862, "dur": 332.087, + "args": { + "External id": 235334,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3909, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097794167.388, "dur": 316.382, + "args": { + "External id": 235335,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097794511.575, "dur": 2.092, + "args": { + "External id": 235336,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3911, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097794568.410, "dur": 6.302, + "args": { + "External id": 235337,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097794615.827, "dur": 1.354, + "args": { + "External id": 235338,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097794681.200, "dur": 2.212, + "args": { + "External id": 235339,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097794696.494, "dur": 1.010, + "args": { + "External id": 235340,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097794707.182, "dur": 0.818, + "args": { + "External id": 235341,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097794717.939, "dur": 1.128, + "args": { + "External id": 235342,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097794729.478, "dur": 0.966, + "args": { + "External id": 235343,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097794741.480, "dur": 0.844, + "args": { + "External id": 235344,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097794752.287, "dur": 0.962, + "args": { + "External id": 235345,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097794844.255, "dur": 2611.146, + "args": { + "External id": 235346,"Record function id": 0, "Ev Idx": 3921 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 2070552, "tid": 2107648, + "ts": 5327097794862.794, "dur": 981.965, + "args": { + "External id": 235347,"Record function id": 0, "Ev Idx": 3922 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2070552, "tid": 2107648, + "ts": 5327097794876.175, "dur": 320.852, + "args": { + "External id": 235348,"Record function id": 0, "Ev Idx": 3923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097794958.333, "dur": 4.350, + "args": { + "External id": 235349,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097794966.114, "dur": 1.148, + "args": { + "External id": 235350,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097794968.917, "dur": 1.354, + "args": { + "External id": 235351,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097794972.094, "dur": 1.214, + "args": { + "External id": 235352,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097794975.161, "dur": 15.837, + "args": { + "External id": 235353,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097794994.794, "dur": 1.503, + "args": { + "External id": 235354,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097794998.239, "dur": 1.069, + "args": { + "External id": 235355,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097795000.844, "dur": 1.247, + "args": { + "External id": 235356,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097795004.114, "dur": 1.138, + "args": { + "External id": 235357,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097795006.745, "dur": 1.157, + "args": { + "External id": 235358,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097795026.594, "dur": 141.910, + "args": { + "External id": 235359,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097795042.316, "dur": 122.132, + "args": { + "External id": 235360,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097795055.091, "dur": 13.445, + "args": { + "External id": 235361,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097795072.928, "dur": 63.450, + "args": { + "External id": 235362,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097795075.406, "dur": 60.703, + "args": { + "External id": 235363,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795079.103, "dur": 6.001, + "args": { + "External id": 235364,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097795086.607, "dur": 49.042, + "args": { + "External id": 235365,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3940 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 2070552, "tid": 2107648, + "ts": 5327097795282.969, "dur": 554.418, + "args": { + "External id": 235366,"Record function id": 0, "Ev Idx": 3941 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2070552, "tid": 2107648, + "ts": 5327097795299.187, "dur": 526.519, + "args": { + "External id": 235367,"Record function id": 0, "Ev Idx": 3942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097795358.257, "dur": 5.297, + "args": { + "External id": 235368,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097795378.136, "dur": 28.911, + "args": { + "External id": 235369,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795382.216, "dur": 1.549, + "args": { + "External id": 235370,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795386.175, "dur": 0.673, + "args": { + "External id": 235371,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795388.455, "dur": 0.526, + "args": { + "External id": 235372,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795390.249, "dur": 0.827, + "args": { + "External id": 235373,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795392.187, "dur": 0.752, + "args": { + "External id": 235374,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795394.238, "dur": 0.665, + "args": { + "External id": 235375,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795396.474, "dur": 0.855, + "args": { + "External id": 235376,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795398.262, "dur": 0.547, + "args": { + "External id": 235377,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795400.463, "dur": 0.639, + "args": { + "External id": 235378,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097795417.051, "dur": 30.315, + "args": { + "External id": 235379,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097795477.383, "dur": 93.552, + "args": { + "External id": 235380,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097795486.430, "dur": 3.346, + "args": { + "External id": 235381,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097795494.763, "dur": 9.182, + "args": { + "External id": 235382,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097795498.658, "dur": 4.849, + "args": { + "External id": 235383,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795501.689, "dur": 0.481, + "args": { + "External id": 235384,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097795510.246, "dur": 23.281, + "args": { + "External id": 235385,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795511.936, "dur": 0.477, + "args": { + "External id": 235386,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795513.611, "dur": 0.659, + "args": { + "External id": 235387,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795515.292, "dur": 0.690, + "args": { + "External id": 235388,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795517.189, "dur": 0.852, + "args": { + "External id": 235389,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795519.022, "dur": 0.728, + "args": { + "External id": 235390,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795520.978, "dur": 0.628, + "args": { + "External id": 235391,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795523.039, "dur": 0.602, + "args": { + "External id": 235392,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795525.014, "dur": 0.400, + "args": { + "External id": 235393,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097795526.753, "dur": 0.964, + "args": { + "External id": 235394,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097795543.699, "dur": 19.575, + "args": { + "External id": 235395,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097795610.237, "dur": 147.912, + "args": { + "External id": 235396,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097795668.312, "dur": 86.429, + "args": { + "External id": 235397,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3972, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097795678.292, "dur": 72.217, + "args": { + "External id": 235398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097795774.531, "dur": 1.762, + "args": { + "External id": 235399,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3974, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097795852.080, "dur": 1580.461, + "args": { + "External id": 235400,"Sequence number": 959116, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3975 + } + }, + { + "ph": "f", "id": 60, "pid": 2070552, "tid": 2107648, "ts": 5327097795852.080, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097795961.483, "dur": 121.265, + "args": { + "External id": 235401,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097796123.356, "dur": 42.248, + "args": { + "External id": 235402,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097796181.302, "dur": 49.413, + "args": { + "External id": 235403,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097796240.520, "dur": 32.016, + "args": { + "External id": 235404,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097796278.963, "dur": 45.413, + "args": { + "External id": 235405,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097796331.267, "dur": 27.459, + "args": { + "External id": 235406,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097796365.929, "dur": 43.033, + "args": { + "External id": 235407,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097796428.033, "dur": 21.691, + "args": { + "External id": 235408,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097796466.511, "dur": 28.273, + "args": { + "External id": 235409,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097796513.400, "dur": 19.253, + "args": { + "External id": 235410,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097796544.307, "dur": 14.151, + "args": { + "External id": 235411,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097796569.825, "dur": 28.753, + "args": { + "External id": 235412,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097796601.294, "dur": 84.201, + "args": { + "External id": 235413,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097796717.195, "dur": 172.747, + "args": { + "External id": 235414,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097796795.361, "dur": 6.349, + "args": { + "External id": 235415,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097796803.538, "dur": 3.240, + "args": { + "External id": 235416,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097796920.857, "dur": 25.436, + "args": { + "External id": 235417,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097796957.155, "dur": 14.832, + "args": { + "External id": 235418,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097796996.403, "dur": 43.914, + "args": { + "External id": 235419,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097797047.405, "dur": 35.943, + "args": { + "External id": 235420,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097797091.084, "dur": 21.948, + "args": { + "External id": 235421,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097797117.658, "dur": 29.262, + "args": { + "External id": 235422,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097797153.311, "dur": 21.261, + "args": { + "External id": 235423,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097797181.006, "dur": 29.207, + "args": { + "External id": 235424,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097797241.399, "dur": 29.337, + "args": { + "External id": 235425,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097797288.790, "dur": 23.096, + "args": { + "External id": 235426,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097797325.002, "dur": 16.978, + "args": { + "External id": 235427,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097797359.671, "dur": 14.067, + "args": { + "External id": 235428,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097797385.614, "dur": 15.621, + "args": { + "External id": 235429,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797479.716, "dur": 14.554, + "args": { + "External id": 235430,"Record function id": 0, "Ev Idx": 4005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797483.067, "dur": 10.230, + "args": { + "External id": 235431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797486.887, "dur": 5.484, + "args": { + "External id": 235432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797487.991, "dur": 4.293, + "args": { + "External id": 235433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797497.888, "dur": 4.408, + "args": { + "External id": 235434,"Record function id": 0, "Ev Idx": 4009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797499.236, "dur": 2.599, + "args": { + "External id": 235435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797499.997, "dur": 1.400, + "args": { + "External id": 235436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797500.360, "dur": 0.897, + "args": { + "External id": 235437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797505.602, "dur": 4.348, + "args": { + "External id": 235438,"Record function id": 0, "Ev Idx": 4013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797506.789, "dur": 2.707, + "args": { + "External id": 235439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797507.467, "dur": 1.481, + "args": { + "External id": 235440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797507.871, "dur": 0.990, + "args": { + "External id": 235441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797513.128, "dur": 4.494, + "args": { + "External id": 235442,"Record function id": 0, "Ev Idx": 4017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797514.636, "dur": 2.551, + "args": { + "External id": 235443,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797515.301, "dur": 1.168, + "args": { + "External id": 235444,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797515.627, "dur": 0.765, + "args": { + "External id": 235445,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797523.855, "dur": 4.291, + "args": { + "External id": 235446,"Record function id": 0, "Ev Idx": 4021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797525.063, "dur": 2.628, + "args": { + "External id": 235447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797525.707, "dur": 1.547, + "args": { + "External id": 235448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797526.207, "dur": 0.977, + "args": { + "External id": 235449,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797531.197, "dur": 4.161, + "args": { + "External id": 235450,"Record function id": 0, "Ev Idx": 4025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797532.411, "dur": 2.510, + "args": { + "External id": 235451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797533.099, "dur": 1.222, + "args": { + "External id": 235452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797533.375, "dur": 0.840, + "args": { + "External id": 235453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797538.527, "dur": 4.380, + "args": { + "External id": 235454,"Record function id": 0, "Ev Idx": 4029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797539.566, "dur": 2.888, + "args": { + "External id": 235455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797540.273, "dur": 1.762, + "args": { + "External id": 235456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797540.711, "dur": 1.250, + "args": { + "External id": 235457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797545.927, "dur": 4.700, + "args": { + "External id": 235458,"Record function id": 0, "Ev Idx": 4033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797547.021, "dur": 3.158, + "args": { + "External id": 235459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797547.714, "dur": 1.921, + "args": { + "External id": 235460,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797548.167, "dur": 1.404, + "args": { + "External id": 235461,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797553.600, "dur": 4.265, + "args": { + "External id": 235462,"Record function id": 0, "Ev Idx": 4037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097797555.148, "dur": 2.292, + "args": { + "External id": 235463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797555.797, "dur": 1.100, + "args": { + "External id": 235464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097797556.147, "dur": 0.682, + "args": { + "External id": 235465,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097797561.721, "dur": 36714.440, + "args": { + "External id": 235466,"Record function id": 0, "Sequence number": 959115, "Fwd thread id": 1, "Ev Idx": 4041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097797562.785, "dur": 36704.648, + "args": { + "External id": 235467,"Sequence number": 959115, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4042 + } + }, + { + "ph": "f", "id": 61, "pid": 2070552, "tid": 2107648, "ts": 5327097797562.785, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 2070552, "tid": 2107648, + "ts": 5327097797590.044, "dur": 73.560, + "args": { + "External id": 235468,"Record function id": 0, "Ev Idx": 4043 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 2070552, "tid": 2107648, + "ts": 5327097797673.774, "dur": 71.036, + "args": { + "External id": 235469,"Record function id": 0, "Ev Idx": 4044 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 2070552, "tid": 2107648, + "ts": 5327097797751.282, "dur": 36508.565, + "args": { + "External id": 235470,"Record function id": 0, "Ev Idx": 4045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097797847.825, "dur": 6.606, + "args": { + "External id": 235471,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097797863.935, "dur": 5.230, + "args": { + "External id": 235472,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097797882.446, "dur": 35559.782, + "args": { + "External id": 235473,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097797896.611, "dur": 35536.860, + "args": { + "External id": 235474,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097797948.508, "dur": 14.126, + "args": { + "External id": 235475,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097797968.756, "dur": 35425.804, + "args": { + "External id": 235476,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097797971.122, "dur": 35422.723, + "args": { + "External id": 235477,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097797974.805, "dur": 24.629, + "args": { + "External id": 235478,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097798002.115, "dur": 35388.155, + "args": { + "External id": 235479,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097833535.567, "dur": 8.469, + "args": { + "External id": 235480,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097833538.030, "dur": 5.668, + "args": { + "External id": 235481,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097833571.283, "dur": 395.162, + "args": { + "External id": 235482,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097833595.344, "dur": 366.470, + "args": { + "External id": 235483,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4058, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097833605.188, "dur": 351.179, + "args": { + "External id": 235484,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097833998.120, "dur": 3.255, + "args": { + "External id": 235485,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4060, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834064.369, "dur": 6.706, + "args": { + "External id": 235486,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834113.731, "dur": 1.526, + "args": { + "External id": 235487,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834129.644, "dur": 1.224, + "args": { + "External id": 235488,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834142.848, "dur": 0.919, + "args": { + "External id": 235489,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834154.135, "dur": 0.973, + "args": { + "External id": 235490,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834164.955, "dur": 1.343, + "args": { + "External id": 235491,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834177.773, "dur": 0.914, + "args": { + "External id": 235492,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834189.391, "dur": 1.124, + "args": { + "External id": 235493,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834199.319, "dur": 0.827, + "args": { + "External id": 235494,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097834290.280, "dur": 2628.305, + "args": { + "External id": 235495,"Record function id": 0, "Ev Idx": 4070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 2070552, "tid": 2107648, + "ts": 5327097834309.693, "dur": 992.931, + "args": { + "External id": 235496,"Record function id": 0, "Ev Idx": 4071 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2070552, "tid": 2107648, + "ts": 5327097834324.997, "dur": 293.035, + "args": { + "External id": 235497,"Record function id": 0, "Ev Idx": 4072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097834406.490, "dur": 3.961, + "args": { + "External id": 235498,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097834413.643, "dur": 0.797, + "args": { + "External id": 235499,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097834416.048, "dur": 0.856, + "args": { + "External id": 235500,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097834418.352, "dur": 0.924, + "args": { + "External id": 235501,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097834420.811, "dur": 0.781, + "args": { + "External id": 235502,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097834423.046, "dur": 0.839, + "args": { + "External id": 235503,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097834425.644, "dur": 1.115, + "args": { + "External id": 235504,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097834428.120, "dur": 0.813, + "args": { + "External id": 235505,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097834430.392, "dur": 1.094, + "args": { + "External id": 235506,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097834432.844, "dur": 0.849, + "args": { + "External id": 235507,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097834451.111, "dur": 140.005, + "args": { + "External id": 235508,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097834466.680, "dur": 119.949, + "args": { + "External id": 235509,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097834480.054, "dur": 11.668, + "args": { + "External id": 235510,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097834495.386, "dur": 64.506, + "args": { + "External id": 235511,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097834497.480, "dur": 61.971, + "args": { + "External id": 235512,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834501.219, "dur": 5.461, + "args": { + "External id": 235513,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097834508.233, "dur": 50.594, + "args": { + "External id": 235514,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4089 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 2070552, "tid": 2107648, + "ts": 5327097834750.377, "dur": 544.328, + "args": { + "External id": 235515,"Record function id": 0, "Ev Idx": 4090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2070552, "tid": 2107648, + "ts": 5327097834768.231, "dur": 514.595, + "args": { + "External id": 235516,"Record function id": 0, "Ev Idx": 4091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097834831.355, "dur": 5.528, + "args": { + "External id": 235517,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097834852.175, "dur": 35.299, + "args": { + "External id": 235518,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834860.153, "dur": 1.500, + "args": { + "External id": 235519,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834863.421, "dur": 0.393, + "args": { + "External id": 235520,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834875.266, "dur": 0.309, + "args": { + "External id": 235521,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834876.244, "dur": 0.281, + "args": { + "External id": 235522,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834877.198, "dur": 0.523, + "args": { + "External id": 235523,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834878.557, "dur": 0.472, + "args": { + "External id": 235524,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834880.054, "dur": 0.200, + "args": { + "External id": 235525,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834881.481, "dur": 0.395, + "args": { + "External id": 235526,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097834882.775, "dur": 0.232, + "args": { + "External id": 235527,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097834897.274, "dur": 36.347, + "args": { + "External id": 235528,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097834962.292, "dur": 109.039, + "args": { + "External id": 235529,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097834972.136, "dur": 3.069, + "args": { + "External id": 235530,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097834996.652, "dur": 10.679, + "args": { + "External id": 235531,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097835000.528, "dur": 6.351, + "args": { + "External id": 235532,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097835004.067, "dur": 0.686, + "args": { + "External id": 235533,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097835015.383, "dur": 17.266, + "args": { + "External id": 235534,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097835016.838, "dur": 0.869, + "args": { + "External id": 235535,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097835018.759, "dur": 0.678, + "args": { + "External id": 235536,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097835020.206, "dur": 0.371, + "args": { + "External id": 235537,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097835021.702, "dur": 0.380, + "args": { + "External id": 235538,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097835022.886, "dur": 0.583, + "args": { + "External id": 235539,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097835024.381, "dur": 0.307, + "args": { + "External id": 235540,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097835025.400, "dur": 0.450, + "args": { + "External id": 235541,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097835026.701, "dur": 0.583, + "args": { + "External id": 235542,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097835028.083, "dur": 0.613, + "args": { + "External id": 235543,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097835041.819, "dur": 21.705, + "args": { + "External id": 235544,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097835115.270, "dur": 105.515, + "args": { + "External id": 235545,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097835137.025, "dur": 80.478, + "args": { + "External id": 235546,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4121, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097835145.383, "dur": 67.996, + "args": { + "External id": 235547,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097835233.641, "dur": 1.681, + "args": { + "External id": 235548,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4123, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097835309.437, "dur": 1587.397, + "args": { + "External id": 235549,"Sequence number": 959114, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4124 + } + }, + { + "ph": "f", "id": 62, "pid": 2070552, "tid": 2107648, "ts": 5327097835309.437, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097835422.669, "dur": 101.670, + "args": { + "External id": 235550,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097835559.777, "dur": 40.152, + "args": { + "External id": 235551,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097835615.888, "dur": 97.295, + "args": { + "External id": 235552,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097835727.919, "dur": 33.167, + "args": { + "External id": 235553,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097835766.774, "dur": 44.434, + "args": { + "External id": 235554,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097835817.656, "dur": 27.354, + "args": { + "External id": 235555,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097835852.476, "dur": 41.289, + "args": { + "External id": 235556,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097835915.654, "dur": 24.418, + "args": { + "External id": 235557,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097835956.515, "dur": 42.004, + "args": { + "External id": 235558,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097836022.308, "dur": 21.511, + "args": { + "External id": 235559,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097836055.869, "dur": 15.248, + "args": { + "External id": 235560,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097836079.292, "dur": 33.420, + "args": { + "External id": 235561,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097836115.773, "dur": 32.773, + "args": { + "External id": 235562,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097836173.567, "dur": 159.286, + "args": { + "External id": 235563,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097836242.648, "dur": 6.146, + "args": { + "External id": 235564,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097836250.471, "dur": 2.529, + "args": { + "External id": 235565,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097836364.003, "dur": 24.708, + "args": { + "External id": 235566,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097836399.046, "dur": 14.608, + "args": { + "External id": 235567,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097836421.110, "dur": 39.525, + "args": { + "External id": 235568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097836465.559, "dur": 35.064, + "args": { + "External id": 235569,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097836509.702, "dur": 22.046, + "args": { + "External id": 235570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097836535.697, "dur": 30.397, + "args": { + "External id": 235571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097836570.994, "dur": 21.186, + "args": { + "External id": 235572,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097836598.734, "dur": 64.952, + "args": { + "External id": 235573,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097836684.034, "dur": 28.277, + "args": { + "External id": 235574,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097836728.990, "dur": 39.438, + "args": { + "External id": 235575,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097836789.265, "dur": 20.802, + "args": { + "External id": 235576,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097836824.577, "dur": 14.496, + "args": { + "External id": 235577,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097836849.524, "dur": 20.100, + "args": { + "External id": 235578,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097836942.926, "dur": 13.640, + "args": { + "External id": 235579,"Record function id": 0, "Ev Idx": 4154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097836945.933, "dur": 9.681, + "args": { + "External id": 235580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097836949.756, "dur": 5.097, + "args": { + "External id": 235581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097836950.881, "dur": 3.881, + "args": { + "External id": 235582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097836960.161, "dur": 4.861, + "args": { + "External id": 235583,"Record function id": 0, "Ev Idx": 4158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097836961.815, "dur": 2.779, + "args": { + "External id": 235584,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097836962.741, "dur": 1.408, + "args": { + "External id": 235585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097836963.071, "dur": 0.975, + "args": { + "External id": 235586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097836968.262, "dur": 4.414, + "args": { + "External id": 235587,"Record function id": 0, "Ev Idx": 4162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097836969.638, "dur": 2.634, + "args": { + "External id": 235588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097836970.415, "dur": 1.225, + "args": { + "External id": 235589,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097836971.054, "dur": 0.504, + "args": { + "External id": 235590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097836975.800, "dur": 21.848, + "args": { + "External id": 235591,"Record function id": 0, "Ev Idx": 4166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097836992.931, "dur": 3.998, + "args": { + "External id": 235592,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097836994.137, "dur": 1.987, + "args": { + "External id": 235593,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097836994.601, "dur": 1.255, + "args": { + "External id": 235594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097837002.408, "dur": 4.648, + "args": { + "External id": 235595,"Record function id": 0, "Ev Idx": 4170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097837003.962, "dur": 2.651, + "args": { + "External id": 235596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097837004.505, "dur": 1.425, + "args": { + "External id": 235597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097837004.982, "dur": 0.879, + "args": { + "External id": 235598,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097837010.127, "dur": 4.150, + "args": { + "External id": 235599,"Record function id": 0, "Ev Idx": 4174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097837011.378, "dur": 2.485, + "args": { + "External id": 235600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097837012.126, "dur": 1.293, + "args": { + "External id": 235601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097837012.578, "dur": 0.772, + "args": { + "External id": 235602,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097837017.398, "dur": 3.416, + "args": { + "External id": 235603,"Record function id": 0, "Ev Idx": 4178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097837018.470, "dur": 1.954, + "args": { + "External id": 235604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097837018.995, "dur": 0.998, + "args": { + "External id": 235605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097837019.327, "dur": 0.589, + "args": { + "External id": 235606,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097837023.868, "dur": 3.856, + "args": { + "External id": 235607,"Record function id": 0, "Ev Idx": 4182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097837025.052, "dur": 2.252, + "args": { + "External id": 235608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097837025.718, "dur": 1.064, + "args": { + "External id": 235609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097837026.075, "dur": 0.631, + "args": { + "External id": 235610,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097837030.950, "dur": 3.857, + "args": { + "External id": 235611,"Record function id": 0, "Ev Idx": 4186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097837032.226, "dur": 2.083, + "args": { + "External id": 235612,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097837032.870, "dur": 1.020, + "args": { + "External id": 235613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097837033.292, "dur": 0.523, + "args": { + "External id": 235614,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097837038.816, "dur": 38777.313, + "args": { + "External id": 235615,"Record function id": 0, "Sequence number": 959113, "Fwd thread id": 1, "Ev Idx": 4190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097837040.223, "dur": 38766.656, + "args": { + "External id": 235616,"Sequence number": 959113, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4191 + } + }, + { + "ph": "f", "id": 63, "pid": 2070552, "tid": 2107648, "ts": 5327097837040.223, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 2070552, "tid": 2107648, + "ts": 5327097837068.316, "dur": 39.423, + "args": { + "External id": 235617,"Record function id": 0, "Ev Idx": 4192 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 2070552, "tid": 2107648, + "ts": 5327097837115.132, "dur": 64.833, + "args": { + "External id": 235618,"Record function id": 0, "Ev Idx": 4193 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 2070552, "tid": 2107648, + "ts": 5327097837185.070, "dur": 38614.114, + "args": { + "External id": 235619,"Record function id": 0, "Ev Idx": 4194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097837271.111, "dur": 5.949, + "args": { + "External id": 235620,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097837285.988, "dur": 4.727, + "args": { + "External id": 235621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097837307.272, "dur": 37628.420, + "args": { + "External id": 235622,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097837319.496, "dur": 37608.149, + "args": { + "External id": 235623,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097837364.102, "dur": 13.959, + "args": { + "External id": 235624,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097837383.950, "dur": 37506.337, + "args": { + "External id": 235625,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097837386.223, "dur": 37503.367, + "args": { + "External id": 235626,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097837389.922, "dur": 4.992, + "args": { + "External id": 235627,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097837396.429, "dur": 37489.780, + "args": { + "External id": 235628,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097875033.561, "dur": 8.773, + "args": { + "External id": 235629,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097875036.028, "dur": 5.868, + "args": { + "External id": 235630,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097875069.405, "dur": 410.752, + "args": { + "External id": 235631,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097875091.617, "dur": 383.100, + "args": { + "External id": 235632,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4207, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097875101.083, "dur": 367.561, + "args": { + "External id": 235633,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097875500.280, "dur": 2.152, + "args": { + "External id": 235634,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4209, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097875561.741, "dur": 6.520, + "args": { + "External id": 235635,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097875610.348, "dur": 1.417, + "args": { + "External id": 235636,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097875663.185, "dur": 1.946, + "args": { + "External id": 235637,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097875680.545, "dur": 1.163, + "args": { + "External id": 235638,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097875692.180, "dur": 0.911, + "args": { + "External id": 235639,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097875702.686, "dur": 0.776, + "args": { + "External id": 235640,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097875713.478, "dur": 0.916, + "args": { + "External id": 235641,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097875724.589, "dur": 0.788, + "args": { + "External id": 235642,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097875737.245, "dur": 1.061, + "args": { + "External id": 235643,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097875830.901, "dur": 2619.432, + "args": { + "External id": 235644,"Record function id": 0, "Ev Idx": 4219 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 2070552, "tid": 2107648, + "ts": 5327097875850.692, "dur": 983.028, + "args": { + "External id": 235645,"Record function id": 0, "Ev Idx": 4220 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2070552, "tid": 2107648, + "ts": 5327097875864.284, "dur": 309.999, + "args": { + "External id": 235646,"Record function id": 0, "Ev Idx": 4221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097875945.719, "dur": 4.301, + "args": { + "External id": 235647,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097875952.974, "dur": 0.944, + "args": { + "External id": 235648,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097875955.718, "dur": 0.856, + "args": { + "External id": 235649,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097875958.127, "dur": 0.700, + "args": { + "External id": 235650,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097875960.162, "dur": 0.628, + "args": { + "External id": 235651,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097875961.909, "dur": 0.863, + "args": { + "External id": 235652,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097875964.079, "dur": 0.846, + "args": { + "External id": 235653,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097875966.223, "dur": 0.765, + "args": { + "External id": 235654,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097875968.630, "dur": 0.852, + "args": { + "External id": 235655,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097875971.064, "dur": 1.023, + "args": { + "External id": 235656,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097876005.253, "dur": 142.103, + "args": { + "External id": 235657,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097876020.525, "dur": 122.638, + "args": { + "External id": 235658,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097876034.285, "dur": 12.281, + "args": { + "External id": 235659,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097876050.739, "dur": 64.099, + "args": { + "External id": 235660,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097876053.105, "dur": 61.382, + "args": { + "External id": 235661,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876056.531, "dur": 5.628, + "args": { + "External id": 235662,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097876063.698, "dur": 50.212, + "args": { + "External id": 235663,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4238 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 2070552, "tid": 2107648, + "ts": 5327097876262.173, "dur": 563.924, + "args": { + "External id": 235664,"Record function id": 0, "Ev Idx": 4239 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2070552, "tid": 2107648, + "ts": 5327097876279.434, "dur": 533.724, + "args": { + "External id": 235665,"Record function id": 0, "Ev Idx": 4240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097876339.570, "dur": 4.961, + "args": { + "External id": 235666,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097876359.581, "dur": 22.344, + "args": { + "External id": 235667,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876363.505, "dur": 1.538, + "args": { + "External id": 235668,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876366.596, "dur": 0.333, + "args": { + "External id": 235669,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876367.685, "dur": 0.276, + "args": { + "External id": 235670,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876368.881, "dur": 0.242, + "args": { + "External id": 235671,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876369.896, "dur": 0.255, + "args": { + "External id": 235672,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876370.770, "dur": 0.192, + "args": { + "External id": 235673,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876371.586, "dur": 0.186, + "args": { + "External id": 235674,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876372.319, "dur": 0.771, + "args": { + "External id": 235675,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876374.263, "dur": 0.363, + "args": { + "External id": 235676,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097876391.382, "dur": 32.481, + "args": { + "External id": 235677,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097876452.795, "dur": 95.120, + "args": { + "External id": 235678,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097876461.737, "dur": 3.499, + "args": { + "External id": 235679,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097876470.093, "dur": 9.929, + "args": { + "External id": 235680,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097876473.891, "dur": 5.632, + "args": { + "External id": 235681,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876477.040, "dur": 0.769, + "args": { + "External id": 235682,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097876486.102, "dur": 26.345, + "args": { + "External id": 235683,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876488.089, "dur": 0.752, + "args": { + "External id": 235684,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876489.849, "dur": 0.743, + "args": { + "External id": 235685,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876491.681, "dur": 0.683, + "args": { + "External id": 235686,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876493.920, "dur": 0.408, + "args": { + "External id": 235687,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876495.566, "dur": 0.959, + "args": { + "External id": 235688,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876498.058, "dur": 0.647, + "args": { + "External id": 235689,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876500.271, "dur": 0.437, + "args": { + "External id": 235690,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876501.909, "dur": 0.426, + "args": { + "External id": 235691,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097876503.938, "dur": 0.719, + "args": { + "External id": 235692,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097876521.553, "dur": 19.498, + "args": { + "External id": 235693,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097876588.474, "dur": 153.564, + "args": { + "External id": 235694,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097876610.799, "dur": 127.315, + "args": { + "External id": 235695,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4270, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097876658.604, "dur": 74.277, + "args": { + "External id": 235696,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097876758.469, "dur": 1.823, + "args": { + "External id": 235697,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4272, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097876840.426, "dur": 1590.132, + "args": { + "External id": 235698,"Sequence number": 959112, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4273 + } + }, + { + "ph": "f", "id": 64, "pid": 2070552, "tid": 2107648, "ts": 5327097876840.426, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097876949.975, "dur": 120.862, + "args": { + "External id": 235699,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097877110.543, "dur": 40.710, + "args": { + "External id": 235700,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097877168.559, "dur": 48.568, + "args": { + "External id": 235701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097877226.745, "dur": 31.931, + "args": { + "External id": 235702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097877265.514, "dur": 45.415, + "args": { + "External id": 235703,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097877316.805, "dur": 27.288, + "args": { + "External id": 235704,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097877351.695, "dur": 41.542, + "args": { + "External id": 235705,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097877413.491, "dur": 22.255, + "args": { + "External id": 235706,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097877456.208, "dur": 27.075, + "args": { + "External id": 235707,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097877503.639, "dur": 19.357, + "args": { + "External id": 235708,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097877535.549, "dur": 15.051, + "args": { + "External id": 235709,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097877559.104, "dur": 29.290, + "args": { + "External id": 235710,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097877591.343, "dur": 80.008, + "args": { + "External id": 235711,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097877703.298, "dur": 163.801, + "args": { + "External id": 235712,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097877776.197, "dur": 6.188, + "args": { + "External id": 235713,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097877784.144, "dur": 3.567, + "args": { + "External id": 235714,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097877898.609, "dur": 26.367, + "args": { + "External id": 235715,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097877936.509, "dur": 14.655, + "args": { + "External id": 235716,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097877959.282, "dur": 57.960, + "args": { + "External id": 235717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097878025.710, "dur": 36.820, + "args": { + "External id": 235718,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097878069.621, "dur": 22.270, + "args": { + "External id": 235719,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097878097.099, "dur": 29.061, + "args": { + "External id": 235720,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097878136.003, "dur": 21.400, + "args": { + "External id": 235721,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097878163.996, "dur": 31.124, + "args": { + "External id": 235722,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097878211.628, "dur": 40.997, + "args": { + "External id": 235723,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097878276.749, "dur": 24.004, + "args": { + "External id": 235724,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097878320.866, "dur": 17.348, + "args": { + "External id": 235725,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097878352.477, "dur": 19.118, + "args": { + "External id": 235726,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097878383.629, "dur": 15.514, + "args": { + "External id": 235727,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878471.475, "dur": 14.485, + "args": { + "External id": 235728,"Record function id": 0, "Ev Idx": 4303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878474.496, "dur": 10.560, + "args": { + "External id": 235729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878478.614, "dur": 5.556, + "args": { + "External id": 235730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878479.685, "dur": 4.331, + "args": { + "External id": 235731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878489.583, "dur": 4.032, + "args": { + "External id": 235732,"Record function id": 0, "Ev Idx": 4307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878490.785, "dur": 2.378, + "args": { + "External id": 235733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878491.495, "dur": 1.185, + "args": { + "External id": 235734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878491.801, "dur": 0.815, + "args": { + "External id": 235735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878496.749, "dur": 4.387, + "args": { + "External id": 235736,"Record function id": 0, "Ev Idx": 4311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878497.984, "dur": 2.745, + "args": { + "External id": 235737,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878498.685, "dur": 1.633, + "args": { + "External id": 235738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878499.217, "dur": 1.009, + "args": { + "External id": 235739,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878504.257, "dur": 3.755, + "args": { + "External id": 235740,"Record function id": 0, "Ev Idx": 4315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878505.321, "dur": 2.301, + "args": { + "External id": 235741,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878506.042, "dur": 1.069, + "args": { + "External id": 235742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878506.351, "dur": 0.674, + "args": { + "External id": 235743,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878511.008, "dur": 4.218, + "args": { + "External id": 235744,"Record function id": 0, "Ev Idx": 4319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878512.104, "dur": 2.695, + "args": { + "External id": 235745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878512.838, "dur": 1.364, + "args": { + "External id": 235746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878513.208, "dur": 0.920, + "args": { + "External id": 235747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878518.251, "dur": 4.083, + "args": { + "External id": 235748,"Record function id": 0, "Ev Idx": 4323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878519.481, "dur": 2.450, + "args": { + "External id": 235749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878520.151, "dur": 1.356, + "args": { + "External id": 235750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878520.537, "dur": 0.896, + "args": { + "External id": 235751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878525.442, "dur": 4.224, + "args": { + "External id": 235752,"Record function id": 0, "Ev Idx": 4327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878527.003, "dur": 2.251, + "args": { + "External id": 235753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878527.638, "dur": 1.187, + "args": { + "External id": 235754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878527.951, "dur": 0.809, + "args": { + "External id": 235755,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878532.735, "dur": 3.993, + "args": { + "External id": 235756,"Record function id": 0, "Ev Idx": 4331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878534.061, "dur": 2.241, + "args": { + "External id": 235757,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878534.754, "dur": 1.113, + "args": { + "External id": 235758,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878535.126, "dur": 0.671, + "args": { + "External id": 235759,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878542.918, "dur": 3.759, + "args": { + "External id": 235760,"Record function id": 0, "Ev Idx": 4335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097878544.100, "dur": 2.173, + "args": { + "External id": 235761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878544.620, "dur": 1.228, + "args": { + "External id": 235762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097878544.976, "dur": 0.804, + "args": { + "External id": 235763,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097878550.404, "dur": 36856.248, + "args": { + "External id": 235764,"Record function id": 0, "Sequence number": 959111, "Fwd thread id": 1, "Ev Idx": 4339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097878551.540, "dur": 36846.620, + "args": { + "External id": 235765,"Sequence number": 959111, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4340 + } + }, + { + "ph": "f", "id": 65, "pid": 2070552, "tid": 2107648, "ts": 5327097878551.540, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 2070552, "tid": 2107648, + "ts": 5327097878578.180, "dur": 76.474, + "args": { + "External id": 235766,"Record function id": 0, "Ev Idx": 4341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 2070552, "tid": 2107648, + "ts": 5327097878665.498, "dur": 70.810, + "args": { + "External id": 235767,"Record function id": 0, "Ev Idx": 4342 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 2070552, "tid": 2107648, + "ts": 5327097878742.218, "dur": 36648.535, + "args": { + "External id": 235768,"Record function id": 0, "Ev Idx": 4343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097878833.081, "dur": 6.557, + "args": { + "External id": 235769,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097878849.285, "dur": 5.009, + "args": { + "External id": 235770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097878870.416, "dur": 35742.474, + "args": { + "External id": 235771,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097878883.516, "dur": 35721.472, + "args": { + "External id": 235772,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097878922.379, "dur": 14.187, + "args": { + "External id": 235773,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097878942.518, "dur": 35622.138, + "args": { + "External id": 235774,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097878945.258, "dur": 35618.742, + "args": { + "External id": 235775,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097878949.209, "dur": 5.139, + "args": { + "External id": 235776,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097878955.897, "dur": 35604.654, + "args": { + "External id": 235777,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097914716.271, "dur": 8.235, + "args": { + "External id": 235778,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097914718.838, "dur": 5.171, + "args": { + "External id": 235779,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097914751.225, "dur": 365.694, + "args": { + "External id": 235780,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097914773.794, "dur": 338.377, + "args": { + "External id": 235781,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4356, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097914783.737, "dur": 322.974, + "args": { + "External id": 235782,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097915136.304, "dur": 2.080, + "args": { + "External id": 235783,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4358, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097915193.813, "dur": 6.269, + "args": { + "External id": 235784,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097915241.609, "dur": 1.413, + "args": { + "External id": 235785,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097915258.095, "dur": 1.771, + "args": { + "External id": 235786,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097915271.178, "dur": 1.224, + "args": { + "External id": 235787,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097915283.523, "dur": 1.129, + "args": { + "External id": 235788,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097915295.506, "dur": 1.044, + "args": { + "External id": 235789,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097915307.087, "dur": 1.275, + "args": { + "External id": 235790,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097915320.014, "dur": 1.651, + "args": { + "External id": 235791,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097915331.037, "dur": 1.285, + "args": { + "External id": 235792,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097915419.698, "dur": 2650.483, + "args": { + "External id": 235793,"Record function id": 0, "Ev Idx": 4368 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 2070552, "tid": 2107648, + "ts": 5327097915438.949, "dur": 990.802, + "args": { + "External id": 235794,"Record function id": 0, "Ev Idx": 4369 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2070552, "tid": 2107648, + "ts": 5327097915451.366, "dur": 340.638, + "args": { + "External id": 235795,"Record function id": 0, "Ev Idx": 4370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097915532.871, "dur": 4.180, + "args": { + "External id": 235796,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097915539.994, "dur": 1.303, + "args": { + "External id": 235797,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097915542.893, "dur": 1.276, + "args": { + "External id": 235798,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097915545.821, "dur": 1.656, + "args": { + "External id": 235799,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097915548.892, "dur": 1.083, + "args": { + "External id": 235800,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097915551.204, "dur": 1.194, + "args": { + "External id": 235801,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097915553.913, "dur": 1.350, + "args": { + "External id": 235802,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097915556.435, "dur": 1.548, + "args": { + "External id": 235803,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097915559.607, "dur": 1.116, + "args": { + "External id": 235804,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097915562.324, "dur": 1.524, + "args": { + "External id": 235805,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097915580.421, "dur": 180.493, + "args": { + "External id": 235806,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097915595.517, "dur": 160.494, + "args": { + "External id": 235807,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097915608.372, "dur": 48.108, + "args": { + "External id": 235808,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097915661.331, "dur": 66.695, + "args": { + "External id": 235809,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097915663.498, "dur": 64.241, + "args": { + "External id": 235810,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097915666.953, "dur": 6.703, + "args": { + "External id": 235811,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097915675.305, "dur": 51.878, + "args": { + "External id": 235812,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 2070552, "tid": 2107648, + "ts": 5327097915879.424, "dur": 542.601, + "args": { + "External id": 235813,"Record function id": 0, "Ev Idx": 4388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2070552, "tid": 2107648, + "ts": 5327097915896.203, "dur": 514.170, + "args": { + "External id": 235814,"Record function id": 0, "Ev Idx": 4389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097915959.398, "dur": 4.879, + "args": { + "External id": 235815,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097915994.944, "dur": 25.805, + "args": { + "External id": 235816,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097915999.353, "dur": 1.634, + "args": { + "External id": 235817,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916003.036, "dur": 0.324, + "args": { + "External id": 235818,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916004.291, "dur": 0.843, + "args": { + "External id": 235819,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916006.545, "dur": 0.732, + "args": { + "External id": 235820,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916008.156, "dur": 1.244, + "args": { + "External id": 235821,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916010.591, "dur": 0.673, + "args": { + "External id": 235822,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916012.740, "dur": 0.432, + "args": { + "External id": 235823,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916014.487, "dur": 0.819, + "args": { + "External id": 235824,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916016.322, "dur": 0.671, + "args": { + "External id": 235825,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097916033.475, "dur": 34.805, + "args": { + "External id": 235826,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097916099.271, "dur": 99.076, + "args": { + "External id": 235827,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097916109.715, "dur": 4.417, + "args": { + "External id": 235828,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097916118.736, "dur": 9.886, + "args": { + "External id": 235829,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097916122.531, "dur": 5.661, + "args": { + "External id": 235830,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916125.914, "dur": 0.582, + "args": { + "External id": 235831,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097916135.630, "dur": 26.638, + "args": { + "External id": 235832,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916137.300, "dur": 1.106, + "args": { + "External id": 235833,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916139.385, "dur": 0.768, + "args": { + "External id": 235834,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916141.568, "dur": 0.444, + "args": { + "External id": 235835,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916143.275, "dur": 0.731, + "args": { + "External id": 235836,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916145.122, "dur": 0.645, + "args": { + "External id": 235837,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916146.900, "dur": 0.397, + "args": { + "External id": 235838,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916149.907, "dur": 0.474, + "args": { + "External id": 235839,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916151.708, "dur": 1.513, + "args": { + "External id": 235840,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097916154.061, "dur": 0.720, + "args": { + "External id": 235841,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097916171.845, "dur": 18.887, + "args": { + "External id": 235842,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097916242.936, "dur": 106.015, + "args": { + "External id": 235843,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097916264.506, "dur": 81.210, + "args": { + "External id": 235844,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4419, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097916272.881, "dur": 68.992, + "args": { + "External id": 235845,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097916362.396, "dur": 1.623, + "args": { + "External id": 235846,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4421, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097916436.298, "dur": 1613.030, + "args": { + "External id": 235847,"Sequence number": 959110, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4422 + } + }, + { + "ph": "f", "id": 66, "pid": 2070552, "tid": 2107648, "ts": 5327097916436.298, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097916542.418, "dur": 145.723, + "args": { + "External id": 235848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097916726.821, "dur": 41.595, + "args": { + "External id": 235849,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097916786.160, "dur": 52.469, + "args": { + "External id": 235850,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097916848.702, "dur": 32.184, + "args": { + "External id": 235851,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097916887.677, "dur": 44.728, + "args": { + "External id": 235852,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097916939.485, "dur": 27.210, + "args": { + "External id": 235853,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097916974.597, "dur": 61.768, + "args": { + "External id": 235854,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097917060.212, "dur": 23.854, + "args": { + "External id": 235855,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097917100.551, "dur": 25.403, + "args": { + "External id": 235856,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097917144.855, "dur": 20.270, + "args": { + "External id": 235857,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097917177.853, "dur": 15.444, + "args": { + "External id": 235858,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097917202.781, "dur": 29.875, + "args": { + "External id": 235859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097917235.970, "dur": 31.902, + "args": { + "External id": 235860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097917294.737, "dur": 165.534, + "args": { + "External id": 235861,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097917369.047, "dur": 5.861, + "args": { + "External id": 235862,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097917376.967, "dur": 3.212, + "args": { + "External id": 235863,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097917490.258, "dur": 24.202, + "args": { + "External id": 235864,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097917525.612, "dur": 14.474, + "args": { + "External id": 235865,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097917547.071, "dur": 37.246, + "args": { + "External id": 235866,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097917590.638, "dur": 71.413, + "args": { + "External id": 235867,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097917672.467, "dur": 26.627, + "args": { + "External id": 235868,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097917704.679, "dur": 30.054, + "args": { + "External id": 235869,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097917740.449, "dur": 21.601, + "args": { + "External id": 235870,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097917768.781, "dur": 29.188, + "args": { + "External id": 235871,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097917814.545, "dur": 26.406, + "args": { + "External id": 235872,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097917856.760, "dur": 22.832, + "args": { + "External id": 235873,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097917906.872, "dur": 26.096, + "args": { + "External id": 235874,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097917953.945, "dur": 15.284, + "args": { + "External id": 235875,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097917996.347, "dur": 19.218, + "args": { + "External id": 235876,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918091.937, "dur": 18.527, + "args": { + "External id": 235877,"Record function id": 0, "Ev Idx": 4452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918095.066, "dur": 14.385, + "args": { + "External id": 235878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918103.021, "dur": 5.594, + "args": { + "External id": 235879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918104.351, "dur": 4.127, + "args": { + "External id": 235880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918114.115, "dur": 4.800, + "args": { + "External id": 235881,"Record function id": 0, "Ev Idx": 4456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918115.460, "dur": 3.023, + "args": { + "External id": 235882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918116.123, "dur": 1.612, + "args": { + "External id": 235883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918116.424, "dur": 1.233, + "args": { + "External id": 235884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918122.348, "dur": 3.764, + "args": { + "External id": 235885,"Record function id": 0, "Ev Idx": 4460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918123.458, "dur": 2.232, + "args": { + "External id": 235886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918123.944, "dur": 1.283, + "args": { + "External id": 235887,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918124.341, "dur": 0.796, + "args": { + "External id": 235888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918129.275, "dur": 3.490, + "args": { + "External id": 235889,"Record function id": 0, "Ev Idx": 4464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918130.361, "dur": 1.948, + "args": { + "External id": 235890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918130.843, "dur": 1.031, + "args": { + "External id": 235891,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918131.131, "dur": 0.668, + "args": { + "External id": 235892,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918135.882, "dur": 3.796, + "args": { + "External id": 235893,"Record function id": 0, "Ev Idx": 4468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918136.876, "dur": 2.391, + "args": { + "External id": 235894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918137.355, "dur": 1.361, + "args": { + "External id": 235895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918137.745, "dur": 0.903, + "args": { + "External id": 235896,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918142.803, "dur": 4.027, + "args": { + "External id": 235897,"Record function id": 0, "Ev Idx": 4472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918143.712, "dur": 2.696, + "args": { + "External id": 235898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918144.223, "dur": 1.507, + "args": { + "External id": 235899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918144.819, "dur": 0.846, + "args": { + "External id": 235900,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918149.922, "dur": 3.696, + "args": { + "External id": 235901,"Record function id": 0, "Ev Idx": 4476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918151.066, "dur": 2.141, + "args": { + "External id": 235902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918151.555, "dur": 1.100, + "args": { + "External id": 235903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918151.882, "dur": 0.697, + "args": { + "External id": 235904,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918156.648, "dur": 3.709, + "args": { + "External id": 235905,"Record function id": 0, "Ev Idx": 4480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918157.667, "dur": 2.275, + "args": { + "External id": 235906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918158.119, "dur": 1.257, + "args": { + "External id": 235907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918158.402, "dur": 0.898, + "args": { + "External id": 235908,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918163.387, "dur": 3.795, + "args": { + "External id": 235909,"Record function id": 0, "Ev Idx": 4484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097918164.355, "dur": 2.416, + "args": { + "External id": 235910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918164.805, "dur": 1.330, + "args": { + "External id": 235911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097918165.108, "dur": 0.962, + "args": { + "External id": 235912,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097918170.890, "dur": 36741.206, + "args": { + "External id": 235913,"Record function id": 0, "Sequence number": 959109, "Fwd thread id": 1, "Ev Idx": 4488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097918172.253, "dur": 36730.533, + "args": { + "External id": 235914,"Sequence number": 959109, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4489 + } + }, + { + "ph": "f", "id": 67, "pid": 2070552, "tid": 2107648, "ts": 5327097918172.253, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 2070552, "tid": 2107648, + "ts": 5327097918199.120, "dur": 43.043, + "args": { + "External id": 235915,"Record function id": 0, "Ev Idx": 4490 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 2070552, "tid": 2107648, + "ts": 5327097918250.436, "dur": 70.203, + "args": { + "External id": 235916,"Record function id": 0, "Ev Idx": 4491 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 2070552, "tid": 2107648, + "ts": 5327097918327.069, "dur": 36567.756, + "args": { + "External id": 235917,"Record function id": 0, "Ev Idx": 4492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097918419.917, "dur": 6.283, + "args": { + "External id": 235918,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097918434.972, "dur": 4.620, + "args": { + "External id": 235919,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097918453.321, "dur": 35630.194, + "args": { + "External id": 235920,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097918466.463, "dur": 35608.383, + "args": { + "External id": 235921,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097918504.209, "dur": 14.049, + "args": { + "External id": 235922,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097918524.272, "dur": 35514.922, + "args": { + "External id": 235923,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097918526.704, "dur": 35511.683, + "args": { + "External id": 235924,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097918530.113, "dur": 5.928, + "args": { + "External id": 235925,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097918537.710, "dur": 35497.358, + "args": { + "External id": 235926,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097954170.031, "dur": 9.581, + "args": { + "External id": 235927,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097954172.647, "dur": 6.654, + "args": { + "External id": 235928,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097954207.148, "dur": 372.889, + "args": { + "External id": 235929,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097954231.405, "dur": 343.787, + "args": { + "External id": 235930,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4505, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097954241.096, "dur": 329.193, + "args": { + "External id": 235931,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097954599.653, "dur": 2.001, + "args": { + "External id": 235932,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4507, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097954695.053, "dur": 6.922, + "args": { + "External id": 235933,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097954744.918, "dur": 1.346, + "args": { + "External id": 235934,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097954761.962, "dur": 1.606, + "args": { + "External id": 235935,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097954776.079, "dur": 1.317, + "args": { + "External id": 235936,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097954788.810, "dur": 1.281, + "args": { + "External id": 235937,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097954799.594, "dur": 0.876, + "args": { + "External id": 235938,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097954810.836, "dur": 1.324, + "args": { + "External id": 235939,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097954822.557, "dur": 1.357, + "args": { + "External id": 235940,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097954833.999, "dur": 1.065, + "args": { + "External id": 235941,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097954927.036, "dur": 2688.477, + "args": { + "External id": 235942,"Record function id": 0, "Ev Idx": 4517 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 2070552, "tid": 2107648, + "ts": 5327097954947.058, "dur": 1021.375, + "args": { + "External id": 235943,"Record function id": 0, "Ev Idx": 4518 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2070552, "tid": 2107648, + "ts": 5327097954961.920, "dur": 329.066, + "args": { + "External id": 235944,"Record function id": 0, "Ev Idx": 4519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097955065.950, "dur": 5.082, + "args": { + "External id": 235945,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097955074.465, "dur": 1.961, + "args": { + "External id": 235946,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097955078.265, "dur": 1.649, + "args": { + "External id": 235947,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097955081.654, "dur": 1.951, + "args": { + "External id": 235948,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097955085.141, "dur": 1.769, + "args": { + "External id": 235949,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097955088.174, "dur": 1.487, + "args": { + "External id": 235950,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097955091.164, "dur": 1.518, + "args": { + "External id": 235951,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097955093.870, "dur": 1.100, + "args": { + "External id": 235952,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097955096.303, "dur": 1.735, + "args": { + "External id": 235953,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097955099.452, "dur": 1.302, + "args": { + "External id": 235954,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097955118.052, "dur": 144.968, + "args": { + "External id": 235955,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097955133.194, "dur": 125.134, + "args": { + "External id": 235956,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097955146.290, "dur": 12.894, + "args": { + "External id": 235957,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097955163.034, "dur": 66.165, + "args": { + "External id": 235958,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097955165.446, "dur": 63.401, + "args": { + "External id": 235959,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955169.006, "dur": 5.703, + "args": { + "External id": 235960,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097955176.478, "dur": 51.871, + "args": { + "External id": 235961,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4536 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 2070552, "tid": 2107648, + "ts": 5327097955379.317, "dur": 580.492, + "args": { + "External id": 235962,"Record function id": 0, "Ev Idx": 4537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2070552, "tid": 2107648, + "ts": 5327097955396.550, "dur": 551.111, + "args": { + "External id": 235963,"Record function id": 0, "Ev Idx": 4538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097955460.216, "dur": 4.892, + "args": { + "External id": 235964,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097955480.585, "dur": 23.937, + "args": { + "External id": 235965,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955484.686, "dur": 1.421, + "args": { + "External id": 235966,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955487.233, "dur": 0.305, + "args": { + "External id": 235967,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955488.778, "dur": 0.315, + "args": { + "External id": 235968,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955490.613, "dur": 0.486, + "args": { + "External id": 235969,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955492.294, "dur": 0.708, + "args": { + "External id": 235970,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955494.122, "dur": 0.856, + "args": { + "External id": 235971,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955495.969, "dur": 0.896, + "args": { + "External id": 235972,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955498.337, "dur": 0.363, + "args": { + "External id": 235973,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955500.213, "dur": 0.647, + "args": { + "External id": 235974,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097955514.740, "dur": 30.780, + "args": { + "External id": 235975,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5327097955573.769, "dur": 158.160, + "args": { + "External id": 235976,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097955582.787, "dur": 3.307, + "args": { + "External id": 235977,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5327097955591.115, "dur": 9.675, + "args": { + "External id": 235978,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5327097955594.815, "dur": 5.569, + "args": { + "External id": 235979,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955597.723, "dur": 0.861, + "args": { + "External id": 235980,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5327097955607.248, "dur": 76.943, + "args": { + "External id": 235981,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955609.008, "dur": 0.361, + "args": { + "External id": 235982,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955610.851, "dur": 0.568, + "args": { + "External id": 235983,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955612.417, "dur": 0.616, + "args": { + "External id": 235984,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955614.241, "dur": 0.779, + "args": { + "External id": 235985,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955616.077, "dur": 0.575, + "args": { + "External id": 235986,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955617.367, "dur": 0.828, + "args": { + "External id": 235987,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955669.606, "dur": 0.823, + "args": { + "External id": 235988,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955671.996, "dur": 0.803, + "args": { + "External id": 235989,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097955674.127, "dur": 0.368, + "args": { + "External id": 235990,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097955699.416, "dur": 24.449, + "args": { + "External id": 235991,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097955776.877, "dur": 106.415, + "args": { + "External id": 235992,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097955797.894, "dur": 82.302, + "args": { + "External id": 235993,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4568, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097955806.295, "dur": 69.775, + "args": { + "External id": 235994,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097955897.583, "dur": 1.780, + "args": { + "External id": 235995,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4570, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097955990.943, "dur": 1601.363, + "args": { + "External id": 235996,"Sequence number": 959108, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4571 + } + }, + { + "ph": "f", "id": 68, "pid": 2070552, "tid": 2107648, "ts": 5327097955990.943, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097956108.868, "dur": 105.262, + "args": { + "External id": 235997,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097956251.401, "dur": 40.929, + "args": { + "External id": 235998,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097956308.691, "dur": 48.458, + "args": { + "External id": 235999,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097956367.182, "dur": 32.020, + "args": { + "External id": 236000,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097956405.286, "dur": 45.893, + "args": { + "External id": 236001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097956457.636, "dur": 27.697, + "args": { + "External id": 236002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097956492.100, "dur": 42.742, + "args": { + "External id": 236003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097956556.537, "dur": 21.113, + "args": { + "External id": 236004,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097956594.737, "dur": 78.236, + "args": { + "External id": 236005,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097956698.748, "dur": 22.239, + "args": { + "External id": 236006,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097956735.355, "dur": 16.486, + "args": { + "External id": 236007,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097956760.469, "dur": 34.881, + "args": { + "External id": 236008,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097956798.547, "dur": 35.470, + "args": { + "External id": 236009,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097956862.645, "dur": 182.171, + "args": { + "External id": 236010,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097956935.886, "dur": 5.814, + "args": { + "External id": 236011,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097956943.405, "dur": 3.855, + "args": { + "External id": 236012,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097957079.788, "dur": 26.733, + "args": { + "External id": 236013,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097957117.747, "dur": 14.167, + "args": { + "External id": 236014,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097957140.582, "dur": 41.984, + "args": { + "External id": 236015,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097957188.265, "dur": 39.350, + "args": { + "External id": 236016,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097957234.230, "dur": 21.492, + "args": { + "External id": 236017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097957261.208, "dur": 33.800, + "args": { + "External id": 236018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097957300.592, "dur": 21.081, + "args": { + "External id": 236019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097957328.726, "dur": 50.804, + "args": { + "External id": 236020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097957403.592, "dur": 28.141, + "args": { + "External id": 236021,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097957450.735, "dur": 24.294, + "args": { + "External id": 236022,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097957489.658, "dur": 17.391, + "args": { + "External id": 236023,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097957521.000, "dur": 15.788, + "args": { + "External id": 236024,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097957548.597, "dur": 15.049, + "args": { + "External id": 236025,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957673.691, "dur": 16.791, + "args": { + "External id": 236026,"Record function id": 0, "Ev Idx": 4601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957677.338, "dur": 11.796, + "args": { + "External id": 236027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957681.956, "dur": 5.892, + "args": { + "External id": 236028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957683.074, "dur": 4.569, + "args": { + "External id": 236029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957694.367, "dur": 14.070, + "args": { + "External id": 236030,"Record function id": 0, "Ev Idx": 4605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957695.517, "dur": 12.434, + "args": { + "External id": 236031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957696.425, "dur": 11.045, + "args": { + "External id": 236032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957706.115, "dur": 1.290, + "args": { + "External id": 236033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957711.784, "dur": 3.992, + "args": { + "External id": 236034,"Record function id": 0, "Ev Idx": 4609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957713.069, "dur": 2.278, + "args": { + "External id": 236035,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957713.659, "dur": 1.245, + "args": { + "External id": 236036,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957714.016, "dur": 0.797, + "args": { + "External id": 236037,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957718.943, "dur": 4.765, + "args": { + "External id": 236038,"Record function id": 0, "Ev Idx": 4613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957720.217, "dur": 3.061, + "args": { + "External id": 236039,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957721.247, "dur": 1.625, + "args": { + "External id": 236040,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957721.763, "dur": 1.020, + "args": { + "External id": 236041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957726.783, "dur": 4.236, + "args": { + "External id": 236042,"Record function id": 0, "Ev Idx": 4617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957727.972, "dur": 2.631, + "args": { + "External id": 236043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957728.461, "dur": 1.666, + "args": { + "External id": 236044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957729.113, "dur": 0.941, + "args": { + "External id": 236045,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957734.083, "dur": 4.422, + "args": { + "External id": 236046,"Record function id": 0, "Ev Idx": 4621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957735.113, "dur": 2.969, + "args": { + "External id": 236047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957735.596, "dur": 1.677, + "args": { + "External id": 236048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957736.144, "dur": 1.055, + "args": { + "External id": 236049,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957741.693, "dur": 4.046, + "args": { + "External id": 236050,"Record function id": 0, "Ev Idx": 4625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957742.719, "dur": 2.569, + "args": { + "External id": 236051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957743.449, "dur": 1.142, + "args": { + "External id": 236052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957743.787, "dur": 0.726, + "args": { + "External id": 236053,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957748.775, "dur": 4.733, + "args": { + "External id": 236054,"Record function id": 0, "Ev Idx": 4629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957749.927, "dur": 3.171, + "args": { + "External id": 236055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957750.844, "dur": 1.553, + "args": { + "External id": 236056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957751.555, "dur": 0.767, + "args": { + "External id": 236057,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957756.521, "dur": 4.009, + "args": { + "External id": 236058,"Record function id": 0, "Ev Idx": 4633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097957757.411, "dur": 2.732, + "args": { + "External id": 236059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957758.236, "dur": 1.508, + "args": { + "External id": 236060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097957758.568, "dur": 1.101, + "args": { + "External id": 236061,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097957764.264, "dur": 37975.408, + "args": { + "External id": 236062,"Record function id": 0, "Sequence number": 959107, "Fwd thread id": 1, "Ev Idx": 4637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097957765.950, "dur": 37964.534, + "args": { + "External id": 236063,"Sequence number": 959107, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4638 + } + }, + { + "ph": "f", "id": 69, "pid": 2070552, "tid": 2107648, "ts": 5327097957765.950, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 2070552, "tid": 2107648, + "ts": 5327097957798.100, "dur": 39.062, + "args": { + "External id": 236064,"Record function id": 0, "Ev Idx": 4639 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 2070552, "tid": 2107648, + "ts": 5327097957844.769, "dur": 77.099, + "args": { + "External id": 236065,"Record function id": 0, "Ev Idx": 4640 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 2070552, "tid": 2107648, + "ts": 5327097957928.348, "dur": 37794.344, + "args": { + "External id": 236066,"Record function id": 0, "Ev Idx": 4641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097958032.038, "dur": 7.360, + "args": { + "External id": 236067,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097958049.855, "dur": 5.161, + "args": { + "External id": 236068,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097958069.908, "dur": 36766.782, + "args": { + "External id": 236069,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097958083.729, "dur": 36745.220, + "args": { + "External id": 236070,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097958131.841, "dur": 14.400, + "args": { + "External id": 236071,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097958152.039, "dur": 36639.033, + "args": { + "External id": 236072,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097958154.375, "dur": 36636.040, + "args": { + "External id": 236073,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097958158.171, "dur": 5.190, + "args": { + "External id": 236074,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097958165.073, "dur": 36621.791, + "args": { + "External id": 236075,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097994922.615, "dur": 8.817, + "args": { + "External id": 236076,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097994925.340, "dur": 5.747, + "args": { + "External id": 236077,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327097994958.507, "dur": 433.052, + "args": { + "External id": 236078,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097994995.476, "dur": 391.135, + "args": { + "External id": 236079,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4654, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327097995006.527, "dur": 374.354, + "args": { + "External id": 236080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327097995413.037, "dur": 2.033, + "args": { + "External id": 236081,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4656, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097995479.329, "dur": 6.291, + "args": { + "External id": 236082,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097995527.858, "dur": 1.607, + "args": { + "External id": 236083,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097995544.942, "dur": 1.832, + "args": { + "External id": 236084,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097995558.758, "dur": 1.303, + "args": { + "External id": 236085,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097995571.940, "dur": 1.647, + "args": { + "External id": 236086,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097995583.813, "dur": 1.180, + "args": { + "External id": 236087,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097995596.108, "dur": 1.504, + "args": { + "External id": 236088,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097995608.591, "dur": 1.572, + "args": { + "External id": 236089,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097995651.349, "dur": 2.474, + "args": { + "External id": 236090,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097995754.227, "dur": 2075.771, + "args": { + "External id": 236091,"Record function id": 0, "Ev Idx": 4666 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 2070552, "tid": 2107648, + "ts": 5327097995773.976, "dur": 449.607, + "args": { + "External id": 236092,"Record function id": 0, "Ev Idx": 4667 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2070552, "tid": 2107648, + "ts": 5327097995787.141, "dur": 335.960, + "args": { + "External id": 236093,"Record function id": 0, "Ev Idx": 4668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097995873.975, "dur": 4.522, + "args": { + "External id": 236094,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097995881.742, "dur": 1.292, + "args": { + "External id": 236095,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097995885.082, "dur": 1.660, + "args": { + "External id": 236096,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097995890.931, "dur": 1.740, + "args": { + "External id": 236097,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097995894.198, "dur": 1.401, + "args": { + "External id": 236098,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097995897.187, "dur": 1.701, + "args": { + "External id": 236099,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097995900.423, "dur": 1.125, + "args": { + "External id": 236100,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097995902.902, "dur": 1.958, + "args": { + "External id": 236101,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097995906.851, "dur": 1.116, + "args": { + "External id": 236102,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097995909.555, "dur": 2.152, + "args": { + "External id": 236103,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097995929.011, "dur": 162.272, + "args": { + "External id": 236104,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097995944.541, "dur": 141.884, + "args": { + "External id": 236105,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097995958.088, "dur": 12.242, + "args": { + "External id": 236106,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097995973.998, "dur": 83.372, + "args": { + "External id": 236107,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097995991.179, "dur": 65.951, + "args": { + "External id": 236108,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097995995.781, "dur": 6.256, + "args": { + "External id": 236109,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097996003.892, "dur": 52.811, + "args": { + "External id": 236110,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097996231.248, "dur": 1575.012, + "args": { + "External id": 236111,"Sequence number": 959106, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4686 + } + }, + { + "ph": "f", "id": 70, "pid": 2070552, "tid": 2107648, "ts": 5327097996231.248, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097996335.592, "dur": 102.263, + "args": { + "External id": 236112,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097996475.613, "dur": 39.451, + "args": { + "External id": 236113,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5327097996531.752, "dur": 47.395, + "args": { + "External id": 236114,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097996588.217, "dur": 71.672, + "args": { + "External id": 236115,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097996670.789, "dur": 51.013, + "args": { + "External id": 236116,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097996728.429, "dur": 27.416, + "args": { + "External id": 236117,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097996762.837, "dur": 41.298, + "args": { + "External id": 236118,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097996827.679, "dur": 23.896, + "args": { + "External id": 236119,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5327097996869.601, "dur": 27.156, + "args": { + "External id": 236120,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097996915.696, "dur": 19.847, + "args": { + "External id": 236121,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097996948.785, "dur": 15.235, + "args": { + "External id": 236122,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097996971.916, "dur": 45.893, + "args": { + "External id": 236123,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097997022.833, "dur": 34.039, + "args": { + "External id": 236124,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5327097997085.179, "dur": 168.290, + "args": { + "External id": 236125,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097997161.295, "dur": 6.485, + "args": { + "External id": 236126,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097997169.839, "dur": 2.912, + "args": { + "External id": 236127,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097997287.460, "dur": 26.364, + "args": { + "External id": 236128,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5327097997324.069, "dur": 14.831, + "args": { + "External id": 236129,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097997346.466, "dur": 34.592, + "args": { + "External id": 236130,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097997387.079, "dur": 33.217, + "args": { + "External id": 236131,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097997427.061, "dur": 22.021, + "args": { + "External id": 236132,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097997454.402, "dur": 29.638, + "args": { + "External id": 236133,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097997490.954, "dur": 20.866, + "args": { + "External id": 236134,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5327097997519.214, "dur": 29.831, + "args": { + "External id": 236135,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5327097997563.867, "dur": 22.631, + "args": { + "External id": 236136,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097997603.507, "dur": 80.323, + "args": { + "External id": 236137,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5327097997702.027, "dur": 19.003, + "args": { + "External id": 236138,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5327097997736.358, "dur": 14.425, + "args": { + "External id": 236139,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5327097997763.445, "dur": 15.823, + "args": { + "External id": 236140,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997851.489, "dur": 14.693, + "args": { + "External id": 236141,"Record function id": 0, "Ev Idx": 4716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997854.696, "dur": 10.464, + "args": { + "External id": 236142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997858.865, "dur": 5.484, + "args": { + "External id": 236143,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997860.189, "dur": 4.066, + "args": { + "External id": 236144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997870.021, "dur": 5.021, + "args": { + "External id": 236145,"Record function id": 0, "Ev Idx": 4720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997871.373, "dur": 3.197, + "args": { + "External id": 236146,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997872.037, "dur": 2.005, + "args": { + "External id": 236147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997872.613, "dur": 1.351, + "args": { + "External id": 236148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997878.317, "dur": 4.867, + "args": { + "External id": 236149,"Record function id": 0, "Ev Idx": 4724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997879.930, "dur": 2.823, + "args": { + "External id": 236150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997880.643, "dur": 1.426, + "args": { + "External id": 236151,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997881.111, "dur": 0.871, + "args": { + "External id": 236152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997886.333, "dur": 4.771, + "args": { + "External id": 236153,"Record function id": 0, "Ev Idx": 4728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997887.613, "dur": 3.063, + "args": { + "External id": 236154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997888.533, "dur": 1.576, + "args": { + "External id": 236155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997888.847, "dur": 1.196, + "args": { + "External id": 236156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997894.152, "dur": 4.601, + "args": { + "External id": 236157,"Record function id": 0, "Ev Idx": 4732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997895.393, "dur": 2.955, + "args": { + "External id": 236158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997896.153, "dur": 1.516, + "args": { + "External id": 236159,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997896.455, "dur": 1.139, + "args": { + "External id": 236160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997902.011, "dur": 4.303, + "args": { + "External id": 236161,"Record function id": 0, "Ev Idx": 4736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997903.515, "dur": 2.401, + "args": { + "External id": 236162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997903.995, "dur": 1.496, + "args": { + "External id": 236163,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997904.434, "dur": 0.983, + "args": { + "External id": 236164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997909.403, "dur": 5.085, + "args": { + "External id": 236165,"Record function id": 0, "Ev Idx": 4740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997910.829, "dur": 3.204, + "args": { + "External id": 236166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997911.907, "dur": 1.386, + "args": { + "External id": 236167,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997912.471, "dur": 0.756, + "args": { + "External id": 236168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997917.687, "dur": 4.389, + "args": { + "External id": 236169,"Record function id": 0, "Ev Idx": 4744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997919.199, "dur": 2.460, + "args": { + "External id": 236170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997919.721, "dur": 1.487, + "args": { + "External id": 236171,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997920.101, "dur": 1.032, + "args": { + "External id": 236172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997925.214, "dur": 4.737, + "args": { + "External id": 236173,"Record function id": 0, "Ev Idx": 4748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327097997926.833, "dur": 2.684, + "args": { + "External id": 236174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997927.274, "dur": 1.493, + "args": { + "External id": 236175,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327097997927.856, "dur": 0.831, + "args": { + "External id": 236176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097997933.752, "dur": 37540.603, + "args": { + "External id": 236177,"Record function id": 0, "Sequence number": 959105, "Fwd thread id": 1, "Ev Idx": 4752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327097997934.884, "dur": 37530.777, + "args": { + "External id": 236178,"Sequence number": 959105, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4753 + } + }, + { + "ph": "f", "id": 71, "pid": 2070552, "tid": 2107648, "ts": 5327097997934.884, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 2070552, "tid": 2107648, + "ts": 5327097997961.595, "dur": 57.806, + "args": { + "External id": 236179,"Record function id": 0, "Ev Idx": 4754 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 2070552, "tid": 2107648, + "ts": 5327097998031.277, "dur": 78.942, + "args": { + "External id": 236180,"Record function id": 0, "Ev Idx": 4755 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 2070552, "tid": 2107648, + "ts": 5327097998116.317, "dur": 37342.140, + "args": { + "External id": 236181,"Record function id": 0, "Ev Idx": 4756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097998214.639, "dur": 8.621, + "args": { + "External id": 236182,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327097998233.557, "dur": 5.221, + "args": { + "External id": 236183,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097998253.420, "dur": 36487.197, + "args": { + "External id": 236184,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327097998266.459, "dur": 36465.626, + "args": { + "External id": 236185,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327097998316.132, "dur": 14.000, + "args": { + "External id": 236186,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327097998335.950, "dur": 36355.722, + "args": { + "External id": 236187,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327097998338.369, "dur": 36352.668, + "args": { + "External id": 236188,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327097998342.212, "dur": 4.977, + "args": { + "External id": 236189,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327097998348.647, "dur": 36339.467, + "args": { + "External id": 236190,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327098034825.221, "dur": 9.170, + "args": { + "External id": 236191,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327098034827.822, "dur": 6.259, + "args": { + "External id": 236192,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327098034860.309, "dur": 311.466, + "args": { + "External id": 236193,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327098034883.488, "dur": 283.650, + "args": { + "External id": 236194,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4769, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327098034893.584, "dur": 267.807, + "args": { + "External id": 236195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327098035192.069, "dur": 2.123, + "args": { + "External id": 236196,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4771, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327098035251.363, "dur": 6.205, + "args": { + "External id": 236197,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327098035299.387, "dur": 1.292, + "args": { + "External id": 236198,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327098035316.769, "dur": 1.466, + "args": { + "External id": 236199,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327098035331.457, "dur": 1.055, + "args": { + "External id": 236200,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327098035345.166, "dur": 1.213, + "args": { + "External id": 236201,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327098035357.093, "dur": 1.305, + "args": { + "External id": 236202,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327098035368.991, "dur": 1.574, + "args": { + "External id": 236203,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327098035382.620, "dur": 1.604, + "args": { + "External id": 236204,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327098035395.540, "dur": 1.400, + "args": { + "External id": 236205,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327098035488.219, "dur": 325.875, + "args": { + "External id": 236206,"Record function id": 0, "Sequence number": 959104, "Fwd thread id": 1, "Ev Idx": 4781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5327098035490.835, "dur": 314.170, + "args": { + "External id": 236207,"Sequence number": 959104, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4782 + } + }, + { + "ph": "f", "id": 72, "pid": 2070552, "tid": 2107648, "ts": 5327098035490.835, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 2070552, "tid": 2107648, + "ts": 5327098035610.264, "dur": 88.168, + "args": { + "External id": 236208,"kernel_hash": "cwtajazwhbnqqeu43trk7x5hwtkpkc7brvmwcfo3dd47qg6wqw5o", "grid": "grid(65536000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "65536000"], "kernel_file": "/tmp/torchinductor_cvm/wt/cwtajazwhbnqqeu43trk7x5hwtkpkc7brvmwcfo3dd47qg6wqw5o.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 4783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 2070552, "tid": 2107648, + "ts": 5327098035718.748, "dur": 29.458, + "args": { + "External id": 236209,"kernel_hash": "cbqt3vkbsukl3ofnzpcsscs2yqya6hwss2ivwbwkcdyw6xw34uhg", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bq/cbqt3vkbsukl3ofnzpcsscs2yqya6hwss2ivwbwkcdyw6xw34uhg.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096], [16, 4096, 2048], [32000, 2048], []], "Ev Idx": 4784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 2070552, "tid": 2107648, + "ts": 5327098035766.800, "dur": 22.385, + "args": { + "External id": 236210,"kernel_hash": "cshlfxe4rl24cbt566rdpjcqclo3uwm54uyk4lptbb3krvpyopow", "grid": "grid(65536000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "65536000"], "kernel_file": "/tmp/torchinductor_cvm/sh/cshlfxe4rl24cbt566rdpjcqclo3uwm54uyk4lptbb3krvpyopow.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 4785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327098035825.890, "dur": 13.634, + "args": { + "External id": 236211,"Record function id": 0, "Ev Idx": 4786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5327098035829.139, "dur": 9.515, + "args": { + "External id": 236212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 4787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5327098035832.754, "dur": 5.110, + "args": { + "External id": 236213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 4788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5327098035833.915, "dur": 3.811, + "args": { + "External id": 236214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 4789 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 2070552, "tid": 2107648, + "ts": 5327098035861.737, "dur": 13839.682, + "args": { + "External id": 236215,"Record function id": 0, "Ev Idx": 4790 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 2070552, "tid": 2107648, + "ts": 5327098035881.914, "dur": 23.425, + "args": { + "External id": 236216,"Record function id": 0, "Ev Idx": 4791 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 2070552, "tid": 2107648, + "ts": 5327098035910.717, "dur": 63.230, + "args": { + "External id": 236217,"Record function id": 0, "Ev Idx": 4792 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 2070552, "tid": 2107648, + "ts": 5327098035995.296, "dur": 13350.976, + "args": { + "External id": 236218,"Record function id": 0, "Ev Idx": 4793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327098036071.622, "dur": 6.465, + "args": { + "External id": 236219,"Record function id": 0, "Concrete Inputs": ["[196610048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5327098036087.783, "dur": 5.845, + "args": { + "External id": 236220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[196610048], []], "Ev Idx": 4795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327098036107.580, "dur": 12480.406, + "args": { + "External id": 236221,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[2048, 1], [1], [2048, 1], [2048, 1]], [], [], [24576256, 1]], "Input Dims": [[[32000, 2048], [2048], [32000, 2048], [32000, 2048]], [], [], [8, 24576256]], "Ev Idx": 4796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5327098036120.094, "dur": 12455.989, + "args": { + "External id": 236222,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[2048, 1], [1], [2048, 1], [2048, 1]], [], [], [24576256, 1]], "Input Dims": [[[32000, 2048], [2048], [32000, 2048], [32000, 2048]], [], [], [8, 24576256]], "Ev Idx": 4797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327098036279.075, "dur": 13.612, + "args": { + "External id": 236223,"Record function id": 0, "Concrete Inputs": ["[48027]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5327098036341.174, "dur": 12195.048, + "args": { + "External id": 236224,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[48027], [], [], [], [], [], [], []], "Ev Idx": 4799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5327098036343.501, "dur": 12191.684, + "args": { + "External id": 236225,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[48027], [], [], [], [], [], []], "Ev Idx": 4800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327098036347.794, "dur": 8.540, + "args": { + "External id": 236226,"Record function id": 0, "Concrete Inputs": ["[48027]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5327098036358.224, "dur": 12172.356, + "args": { + "External id": 236227,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[48027], [48027], []], "Ev Idx": 4802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5327098048712.531, "dur": 9.874, + "args": { + "External id": 236228,"Record function id": 0, "Concrete Inputs": ["", "[24576256]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[196610048], [], [], [], [], []], "Ev Idx": 4803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5327098048715.350, "dur": 6.477, + "args": { + "External id": 236229,"Record function id": 0, "Concrete Inputs": ["[24576256]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5327098048749.879, "dur": 380.921, + "args": { + "External id": 236230,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[24576256], [196610048], [], [], [], []], "Ev Idx": 4805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327098048773.806, "dur": 352.410, + "args": { + "External id": 236231,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 24576256, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[196610048], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4806, "In msg nelems": 196610048 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5327098048783.759, "dur": 336.584, + "args": { + "External id": 236232,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[196610048]], "Ev Idx": 4807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5327098049149.962, "dur": 2.224, + "args": { + "External id": 236233,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4808, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327098049209.636, "dur": 6.159, + "args": { + "External id": 236234,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327098049256.397, "dur": 1.203, + "args": { + "External id": 236235,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "8192000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327098049272.843, "dur": 2.105, + "args": { + "External id": 236236,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "8192256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5327098049286.421, "dur": 1.379, + "args": { + "External id": 236237,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "16384256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4812 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "ProfilerStep#11775", "pid": 2070552, "tid": 2070552, + "ts": 5327096018835.976, "dur": 2047578.128, + "args": { + "External id": 227329,"Record function id": 0, "Ev Idx": 4813 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.zero_grad#AdamW.zero_grad", "pid": 2070552, "tid": 2070552, + "ts": 5327096018869.775, "dur": 640.871, + "args": { + "External id": 227330,"Record function id": 0, "Ev Idx": 4814 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 2070552, "tid": 2070552, + "ts": 5327096019552.991, "dur": 2021.912, + "args": { + "External id": 227331,"Record function id": 0, "Ev Idx": 4815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096020576.990, "dur": 7.383, + "args": { + "External id": 227332,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2070552, "tid": 2070552, + "ts": 5327096020605.457, "dur": 6.376, + "args": { + "External id": 227333,"Sequence number": 959104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096021083.120, "dur": 2.529, + "args": { + "External id": 227334,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2070552, "tid": 2070552, + "ts": 5327096021095.547, "dur": 2.475, + "args": { + "External id": 227335,"Sequence number": 959104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096021461.428, "dur": 1.600, + "args": { + "External id": 227336,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2070552, "tid": 2070552, + "ts": 5327096021467.065, "dur": 1.802, + "args": { + "External id": 227337,"Sequence number": 959104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096022304.006, "dur": 15.829, + "args": { + "External id": 227338,"Sequence number": 959104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 4822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096022313.602, "dur": 2.272, + "args": { + "External id": 227339,"Record function id": 0, "Concrete Inputs": ["", "[16, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 4823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096022321.319, "dur": 4.085, + "args": { + "External id": 227340,"Sequence number": 959104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 4824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096022323.319, "dur": 1.004, + "args": { + "External id": 227341,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 4825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096022353.506, "dur": 532.007, + "args": { + "External id": 227342,"Sequence number": 959104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], [], []], "Ev Idx": 4826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096022360.573, "dur": 523.439, + "args": { + "External id": 227343,"Sequence number": 959104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], []], "Ev Idx": 4827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096022370.130, "dur": 9.748, + "args": { + "External id": 227344,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096022382.437, "dur": 499.834, + "args": { + "External id": 227345,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096022391.957, "dur": 0.459, + "args": { + "External id": 227346,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 4830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096022394.930, "dur": 6.094, + "args": { + "External id": 227347,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[16, 4096], [16, 4096]], "Ev Idx": 4831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2070552, "tid": 2070552, + "ts": 5327096022397.301, "dur": 3.561, + "args": { + "External id": 227348,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[16, 4096], [], []], "Ev Idx": 4832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096022400.125, "dur": 0.500, + "args": { + "External id": 227349,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2070552, "tid": 2070552, + "ts": 5327096022403.407, "dur": 163.173, + "args": { + "External id": 227350,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 4834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2070552, "tid": 2070552, + "ts": 5327096022406.149, "dur": 160.053, + "args": { + "External id": 227351,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 4835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096022408.570, "dur": 16.795, + "args": { + "External id": 227352,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 4836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096022413.222, "dur": 11.614, + "args": { + "External id": 227353,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096022426.237, "dur": 139.392, + "args": { + "External id": 227354,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096022568.282, "dur": 309.879, + "args": { + "External id": 227355,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096022902.973, "dur": 565.708, + "args": { + "External id": 227356,"Sequence number": 959104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], [], []], "Ev Idx": 4840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096022904.994, "dur": 562.926, + "args": { + "External id": 227357,"Sequence number": 959104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], []], "Ev Idx": 4841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096022911.596, "dur": 8.767, + "args": { + "External id": 227358,"Record function id": 0, "Concrete Inputs": ["[16, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096022923.690, "dur": 540.174, + "args": { + "External id": 227359,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[16, 8192], [16, 8192], []], "Ev Idx": 4843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2070552, "tid": 2070552, + "ts": 5327096023497.896, "dur": 52.715, + "args": { + "External id": 227360,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096023503.361, "dur": 5.356, + "args": { + "External id": 227361,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2070552, "tid": 2070552, + "ts": 5327096023511.325, "dur": 38.813, + "args": { + "External id": 227362,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 4846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096023515.601, "dur": 7.351, + "args": { + "External id": 227363,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 4847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 2070552, "tid": 2070552, + "ts": 5327096023561.449, "dur": 105.976, + "args": { + "External id": 227364,"Sequence number": 959104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2070552, "tid": 2070552, + "ts": 5327096023568.531, "dur": 6.806, + "args": { + "External id": 227365,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 4849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096023573.242, "dur": 1.739, + "args": { + "External id": 227366,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 4850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096023576.356, "dur": 4.466, + "args": { + "External id": 227367,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2070552, "tid": 2070552, + "ts": 5327096023583.056, "dur": 2.859, + "args": { + "External id": 227368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[16, 4096]], "Ev Idx": 4852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2070552, "tid": 2070552, + "ts": 5327096023589.022, "dur": 4.511, + "args": { + "External id": 227369,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096023592.711, "dur": 0.448, + "args": { + "External id": 227370,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2070552, "tid": 2070552, + "ts": 5327096023594.201, "dur": 3.935, + "args": { + "External id": 227371,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[16, 4096, 1], [], [], []], "Ev Idx": 4855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096023597.336, "dur": 0.722, + "args": { + "External id": 227372,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[16, 4096, 1], [], [], []], "Ev Idx": 4856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096023599.659, "dur": 5.040, + "args": { + "External id": 227373,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [16, 1, 1, 4096]], "Ev Idx": 4857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2070552, "tid": 2070552, + "ts": 5327096023602.347, "dur": 2.167, + "args": { + "External id": 227374,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 4858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096023603.526, "dur": 0.887, + "args": { + "External id": 227375,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 4859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096023605.638, "dur": 60.437, + "args": { + "External id": 227376,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[16, 1, 1, 4096], [16, 1, 1, 4096], []], "Ev Idx": 4860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096023677.567, "dur": 28.724, + "args": { + "External id": 227377,"Sequence number": 959104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 4861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096023679.590, "dur": 26.527, + "args": { + "External id": 227378,"Sequence number": 959104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], []], "Ev Idx": 4862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096023684.460, "dur": 3.668, + "args": { + "External id": 227379,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096023688.826, "dur": 16.876, + "args": { + "External id": 227380,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096023817.743, "dur": 150.003, + "args": { + "External id": 227381,"Record function id": 0, "Ev Idx": 4865 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 2070552, "tid": 2070552, + "ts": 5327096023903.614, "dur": 53.433, + "args": { + "External id": 227382,"Record function id": 0, "Ev Idx": 4866 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096023974.784, "dur": 59.826, + "args": { + "External id": 227383,"Record function id": 0, "Ev Idx": 4867 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096024044.894, "dur": 7884.518, + "args": { + "External id": 227384,"Record function id": 0, "Ev Idx": 4868 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 2070552, "tid": 2070552, + "ts": 5327096024054.840, "dur": 874.791, + "args": { + "External id": 227385,"Record function id": 0, "Ev Idx": 4869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096024125.744, "dur": 7.831, + "args": { + "External id": 227386,"Record function id": 0, "Concrete Inputs": ["[24576256]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096024149.393, "dur": 16.881, + "args": { + "External id": 227387,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[24576256], [], []], "Ev Idx": 4871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096024153.465, "dur": 1.507, + "args": { + "External id": 227388,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096024160.340, "dur": 0.262, + "args": { + "External id": 227389,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "8192000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096024161.373, "dur": 0.219, + "args": { + "External id": 227390,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "8192256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096024162.215, "dur": 0.424, + "args": { + "External id": 227391,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "16384256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096024179.268, "dur": 48.252, + "args": { + "External id": 227392,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1]], [[1], [1], [1], [1]], []], "Input Dims": [[[8192000], [256], [8192000], [8192000]], [[8192000], [256], [8192000], [8192000]], []], "Ev Idx": 4876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096024257.169, "dur": 99.911, + "args": { + "External id": 227393,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "24576256", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[8192000], [256], [8192000], [8192000]], [], [], [], [], [], []], "Ev Idx": 4877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096024269.644, "dur": 3.955, + "args": { + "External id": 227394,"Record function id": 0, "Concrete Inputs": ["[196610048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096024279.874, "dur": 11.372, + "args": { + "External id": 227395,"Record function id": 0, "Concrete Inputs": ["", "0", "122881280", "24576256"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[196610048], [], [], []], "Ev Idx": 4879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096024284.229, "dur": 6.609, + "args": { + "External id": 227396,"Record function id": 0, "Concrete Inputs": ["", "0", "122881280", "147457536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[196610048], [], [], [], []], "Ev Idx": 4880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096024289.119, "dur": 0.545, + "args": { + "External id": 227397,"Record function id": 0, "Concrete Inputs": ["", "[24576256]", "[1]", "122881280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[196610048], [], [], []], "Ev Idx": 4881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096024297.355, "dur": 11.916, + "args": { + "External id": 227398,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[24576256], [], []], "Ev Idx": 4882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096024298.854, "dur": 0.431, + "args": { + "External id": 227399,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "122881280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096024303.837, "dur": 0.207, + "args": { + "External id": 227400,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "131073280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096024304.631, "dur": 0.419, + "args": { + "External id": 227401,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "131073536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096024305.646, "dur": 0.292, + "args": { + "External id": 227402,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "139265536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096024319.039, "dur": 30.230, + "args": { + "External id": 227403,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1]], [[1], [1], [1], [1]], []], "Input Dims": [[[8192000], [256], [8192000], [8192000]], [[8192000], [256], [8192000], [8192000]], []], "Ev Idx": 4887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096024406.479, "dur": 422.104, + "args": { + "External id": 227404,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[196610048], [24576256], [], [], []], "Ev Idx": 4888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096024437.804, "dur": 385.097, + "args": { + "External id": 227405,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 196610048, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[24576256], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4889, "In msg nelems": 24576256 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096024448.865, "dur": 367.669, + "args": { + "External id": 227406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[24576256]], "Ev Idx": 4890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096024857.155, "dur": 2.571, + "args": { + "External id": 227407,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4891, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 2070552, "tid": 2070552, + "ts": 5327096024945.866, "dur": 6879.451, + "args": { + "External id": 227408,"Record function id": 0, "Ev Idx": 4892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096025053.594, "dur": 6.602, + "args": { + "External id": 227409,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[196610048], []], "Ev Idx": 4893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096025064.497, "dur": 0.932, + "args": { + "External id": 227410,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 4894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096025067.158, "dur": 2.253, + "args": { + "External id": 227411,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096025071.139, "dur": 0.804, + "args": { + "External id": 227412,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 4896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096025073.460, "dur": 0.609, + "args": { + "External id": 227413,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 4897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096025093.420, "dur": 6689.729, + "args": { + "External id": 227414,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[24576256, 1], [], [], [[8192000, 1], [256, 1], [8192000, 1], [8192000, 1]]], "Input Dims": [[8, 24576256], [], [], [[8, 8192000], [8, 256], [8, 8192000], [8, 8192000]]], "Ev Idx": 4898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096025110.218, "dur": 6665.583, + "args": { + "External id": 227415,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[24576256, 1], [], [], [[8192000, 1], [256, 1], [8192000, 1], [8192000, 1]]], "Input Dims": [[8, 24576256], [], [], [[8, 8192000], [8, 256], [8, 8192000], [8, 8192000]]], "Ev Idx": 4899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096025126.771, "dur": 4.909, + "args": { + "External id": 227416,"Record function id": 0, "Concrete Inputs": ["[3447]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096025139.224, "dur": 6601.638, + "args": { + "External id": 227417,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3447], [], [], [], [], [], [], []], "Ev Idx": 4901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096025141.739, "dur": 6598.407, + "args": { + "External id": 227418,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3447], [], [], [], [], [], []], "Ev Idx": 4902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096025146.966, "dur": 5.442, + "args": { + "External id": 227419,"Record function id": 0, "Concrete Inputs": ["[3447]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096025153.717, "dur": 6583.016, + "args": { + "External id": 227420,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3447], [3447], []], "Ev Idx": 4904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096032016.366, "dur": 27.288, + "args": { + "External id": 227421,"Record function id": 0, "Ev Idx": 4905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 2070552, "tid": 2070552, + "ts": 5327096032045.477, "dur": 206.092, + "args": { + "External id": 227422,"Record function id": 0, "Ev Idx": 4906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096032086.785, "dur": 156.037, + "args": { + "External id": 227423,"Sequence number": 959104, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[2048, 1], [4096, 1]], "Input Dims": [[32000, 2048], [16, 4096]], "Ev Idx": 4907 + } + }, + { + "ph": "s", "id": 72, "pid": 2070552, "tid": 2070552, "ts": 5327096032086.785, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096032157.673, "dur": 47.979, + "args": { + "External id": 227424,"kernel_hash": "csqogzggybapwolkqtroddnjtkp7ckvlrazmt5khcreabxycjcwc", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqogzggybapwolkqtroddnjtkp7ckvlrazmt5khcreabxycjcwc.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096], [32000, 2048], [16, 4096, 2048], []], "Ev Idx": 4908 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096032314.060, "dur": 51.056, + "args": { + "External id": 227425,"Record function id": 0, "Ev Idx": 4909 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 2070552, "tid": 2070552, + "ts": 5327096032374.941, "dur": 7493.881, + "args": { + "External id": 227426,"Record function id": 0, "Ev Idx": 4910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2070552, "tid": 2070552, + "ts": 5327096032382.524, "dur": 818.815, + "args": { + "External id": 227427,"Record function id": 0, "Ev Idx": 4911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096032456.472, "dur": 9.375, + "args": { + "External id": 227428,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096032480.200, "dur": 38.829, + "args": { + "External id": 227429,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032489.564, "dur": 2.282, + "args": { + "External id": 227430,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032493.408, "dur": 1.959, + "args": { + "External id": 227431,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032496.007, "dur": 2.480, + "args": { + "External id": 227432,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032500.869, "dur": 0.335, + "args": { + "External id": 227433,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032501.923, "dur": 0.374, + "args": { + "External id": 227434,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032506.612, "dur": 0.292, + "args": { + "External id": 227435,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032507.582, "dur": 0.294, + "args": { + "External id": 227436,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032508.703, "dur": 0.331, + "args": { + "External id": 227437,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032513.265, "dur": 0.158, + "args": { + "External id": 227438,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096032528.860, "dur": 34.369, + "args": { + "External id": 227439,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096032595.019, "dur": 169.581, + "args": { + "External id": 227440,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096032605.264, "dur": 4.431, + "args": { + "External id": 227441,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096032615.030, "dur": 65.723, + "args": { + "External id": 227442,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096032672.137, "dur": 8.130, + "args": { + "External id": 227443,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032677.508, "dur": 0.805, + "args": { + "External id": 227444,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096032688.955, "dur": 25.502, + "args": { + "External id": 227445,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032690.466, "dur": 0.322, + "args": { + "External id": 227446,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032693.368, "dur": 0.331, + "args": { + "External id": 227447,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032696.494, "dur": 0.329, + "args": { + "External id": 227448,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032699.265, "dur": 0.372, + "args": { + "External id": 227449,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032700.337, "dur": 2.173, + "args": { + "External id": 227450,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032703.132, "dur": 0.296, + "args": { + "External id": 227451,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032705.287, "dur": 1.561, + "args": { + "External id": 227452,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032709.030, "dur": 0.165, + "args": { + "External id": 227453,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096032709.748, "dur": 0.330, + "args": { + "External id": 227454,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096032727.301, "dur": 27.961, + "args": { + "External id": 227455,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096032817.475, "dur": 292.818, + "args": { + "External id": 227456,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096032845.737, "dur": 259.605, + "args": { + "External id": 227457,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4941, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096032855.319, "dur": 244.282, + "args": { + "External id": 227458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096033134.866, "dur": 2.494, + "args": { + "External id": 227459,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4943, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2070552, "tid": 2070552, + "ts": 5327096033222.186, "dur": 6437.152, + "args": { + "External id": 227460,"Record function id": 0, "Ev Idx": 4944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096033321.907, "dur": 6.032, + "args": { + "External id": 227461,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096033331.225, "dur": 1.069, + "args": { + "External id": 227462,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096033334.061, "dur": 1.015, + "args": { + "External id": 227463,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096033336.741, "dur": 1.222, + "args": { + "External id": 227464,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096033339.236, "dur": 1.028, + "args": { + "External id": 227465,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096033341.443, "dur": 2.337, + "args": { + "External id": 227466,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096033345.230, "dur": 1.196, + "args": { + "External id": 227467,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096033349.987, "dur": 1.695, + "args": { + "External id": 227468,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096033352.972, "dur": 0.685, + "args": { + "External id": 227469,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096033354.993, "dur": 0.742, + "args": { + "External id": 227470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096033371.725, "dur": 6208.405, + "args": { + "External id": 227471,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096033387.705, "dur": 6184.768, + "args": { + "External id": 227472,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096033410.279, "dur": 12.723, + "args": { + "External id": 227473,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096033427.709, "dur": 6109.928, + "args": { + "External id": 227474,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096033430.170, "dur": 6106.585, + "args": { + "External id": 227475,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096033435.792, "dur": 6.191, + "args": { + "External id": 227476,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096033443.769, "dur": 6089.936, + "args": { + "External id": 227477,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096039810.636, "dur": 33.532, + "args": { + "External id": 227478,"Sequence number": 959105, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4962 + } + }, + { + "ph": "s", "id": 71, "pid": 2070552, "tid": 2070552, "ts": 5327096039810.636, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096039827.927, "dur": 11.330, + "args": { + "External id": 227479,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 4963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096039833.442, "dur": 5.518, + "args": { + "External id": 227480,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 4964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096039909.427, "dur": 128.444, + "args": { + "External id": 227481,"Record function id": 0, "Ev Idx": 4965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096040040.964, "dur": 1142.390, + "args": { + "External id": 227482,"Record function id": 0, "Ev Idx": 4966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096040083.831, "dur": 1085.737, + "args": { + "External id": 227483,"Sequence number": 959106, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 4967 + } + }, + { + "ph": "s", "id": 70, "pid": 2070552, "tid": 2070552, "ts": 5327096040083.831, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096040155.570, "dur": 49.761, + "args": { + "External id": 227484,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 4968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096040218.194, "dur": 106.665, + "args": { + "External id": 227485,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096040334.176, "dur": 37.803, + "args": { + "External id": 227486,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096040381.195, "dur": 31.265, + "args": { + "External id": 227487,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096040437.258, "dur": 25.841, + "args": { + "External id": 227488,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096040481.678, "dur": 15.303, + "args": { + "External id": 227489,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096040514.498, "dur": 187.018, + "args": { + "External id": 227490,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 4974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096040568.084, "dur": 10.379, + "args": { + "External id": 227491,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 4975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096040572.871, "dur": 4.925, + "args": { + "External id": 227492,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096040581.012, "dur": 3.308, + "args": { + "External id": 227493,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096040586.422, "dur": 1.034, + "args": { + "External id": 227494,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096040589.627, "dur": 3.187, + "args": { + "External id": 227495,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096040714.370, "dur": 53.224, + "args": { + "External id": 227496,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096040800.077, "dur": 29.767, + "args": { + "External id": 227497,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 4981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096040838.537, "dur": 41.931, + "args": { + "External id": 227498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096040888.538, "dur": 34.660, + "args": { + "External id": 227499,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096040945.720, "dur": 27.603, + "args": { + "External id": 227500,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 4984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096041004.250, "dur": 38.459, + "args": { + "External id": 227501,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096041070.730, "dur": 20.852, + "args": { + "External id": 227502,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 4986 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 2070552, "tid": 2070552, + "ts": 5327096041246.663, "dur": 81.699, + "args": { + "External id": 227503,"Record function id": 0, "Ev Idx": 4987 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096041398.255, "dur": 41.835, + "args": { + "External id": 227504,"Record function id": 0, "Ev Idx": 4988 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 2070552, "tid": 2070552, + "ts": 5327096041448.964, "dur": 19326.164, + "args": { + "External id": 227505,"Record function id": 0, "Ev Idx": 4989 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2070552, "tid": 2070552, + "ts": 5327096041458.676, "dur": 1038.665, + "args": { + "External id": 227506,"Record function id": 0, "Ev Idx": 4990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096041540.853, "dur": 8.070, + "args": { + "External id": 227507,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096041562.013, "dur": 38.515, + "args": { + "External id": 227508,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041568.664, "dur": 4.693, + "args": { + "External id": 227509,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041576.893, "dur": 0.470, + "args": { + "External id": 227510,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041579.675, "dur": 0.256, + "args": { + "External id": 227511,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041582.729, "dur": 0.293, + "args": { + "External id": 227512,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041583.693, "dur": 0.248, + "args": { + "External id": 227513,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041585.903, "dur": 1.479, + "args": { + "External id": 227514,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041588.064, "dur": 1.451, + "args": { + "External id": 227515,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041590.289, "dur": 0.167, + "args": { + "External id": 227516,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041594.736, "dur": 0.194, + "args": { + "External id": 227517,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096041610.981, "dur": 121.209, + "args": { + "External id": 227518,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096041771.792, "dur": 120.042, + "args": { + "External id": 227519,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096041785.611, "dur": 5.556, + "args": { + "External id": 227520,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096041796.930, "dur": 11.253, + "args": { + "External id": 227521,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096041801.483, "dur": 6.261, + "args": { + "External id": 227522,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041805.153, "dur": 0.766, + "args": { + "External id": 227523,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096041815.469, "dur": 26.605, + "args": { + "External id": 227524,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041818.489, "dur": 0.404, + "args": { + "External id": 227525,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041819.936, "dur": 0.417, + "args": { + "External id": 227526,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041822.761, "dur": 0.218, + "args": { + "External id": 227527,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041825.075, "dur": 1.392, + "args": { + "External id": 227528,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041827.029, "dur": 0.309, + "args": { + "External id": 227529,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041829.883, "dur": 0.390, + "args": { + "External id": 227530,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041832.039, "dur": 0.156, + "args": { + "External id": 227531,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041832.942, "dur": 2.370, + "args": { + "External id": 227532,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096041837.523, "dur": 0.152, + "args": { + "External id": 227533,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096041855.864, "dur": 26.589, + "args": { + "External id": 227534,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096041949.680, "dur": 446.013, + "args": { + "External id": 227535,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096042028.647, "dur": 362.150, + "args": { + "External id": 227536,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5020, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096042039.537, "dur": 345.478, + "args": { + "External id": 227537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096042421.737, "dur": 2.961, + "args": { + "External id": 227538,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5022, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2070552, "tid": 2070552, + "ts": 5327096042518.128, "dur": 18020.539, + "args": { + "External id": 227539,"Record function id": 0, "Ev Idx": 5023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096042685.912, "dur": 7.132, + "args": { + "External id": 227540,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096042697.287, "dur": 0.796, + "args": { + "External id": 227541,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096042699.680, "dur": 2.010, + "args": { + "External id": 227542,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096042703.610, "dur": 0.870, + "args": { + "External id": 227543,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096042705.521, "dur": 0.792, + "args": { + "External id": 227544,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096042709.638, "dur": 0.917, + "args": { + "External id": 227545,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096042711.770, "dur": 0.966, + "args": { + "External id": 227546,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096042714.328, "dur": 2.038, + "args": { + "External id": 227547,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096042717.464, "dur": 0.774, + "args": { + "External id": 227548,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096042721.609, "dur": 0.702, + "args": { + "External id": 227549,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096042742.048, "dur": 17752.347, + "args": { + "External id": 227550,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096042757.467, "dur": 17728.890, + "args": { + "External id": 227551,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096042779.987, "dur": 15.573, + "args": { + "External id": 227552,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096042799.798, "dur": 17643.235, + "args": { + "External id": 227553,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096042802.357, "dur": 17639.938, + "args": { + "External id": 227554,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096042807.477, "dur": 5.586, + "args": { + "External id": 227555,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096042814.650, "dur": 17624.743, + "args": { + "External id": 227556,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096060717.686, "dur": 32.281, + "args": { + "External id": 227557,"Sequence number": 959107, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5041 + } + }, + { + "ph": "s", "id": 69, "pid": 2070552, "tid": 2070552, "ts": 5327096060717.686, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096060736.339, "dur": 8.874, + "args": { + "External id": 227558,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096060739.630, "dur": 5.126, + "args": { + "External id": 227559,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096060815.085, "dur": 85.513, + "args": { + "External id": 227560,"Record function id": 0, "Ev Idx": 5044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096060902.129, "dur": 1091.184, + "args": { + "External id": 227561,"Record function id": 0, "Ev Idx": 5045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096060941.378, "dur": 1022.267, + "args": { + "External id": 227562,"Sequence number": 959108, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5046 + } + }, + { + "ph": "s", "id": 68, "pid": 2070552, "tid": 2070552, "ts": 5327096060941.378, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096061025.073, "dur": 46.439, + "args": { + "External id": 227563,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096061085.592, "dur": 108.545, + "args": { + "External id": 227564,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096061204.291, "dur": 37.335, + "args": { + "External id": 227565,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096061250.799, "dur": 32.054, + "args": { + "External id": 227566,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096061307.657, "dur": 25.213, + "args": { + "External id": 227567,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096061350.009, "dur": 14.089, + "args": { + "External id": 227568,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096061383.465, "dur": 126.556, + "args": { + "External id": 227569,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096061433.640, "dur": 13.002, + "args": { + "External id": 227570,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096061439.498, "dur": 6.332, + "args": { + "External id": 227571,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096061448.961, "dur": 4.426, + "args": { + "External id": 227572,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096061454.540, "dur": 0.859, + "args": { + "External id": 227573,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096061457.674, "dur": 2.720, + "args": { + "External id": 227574,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096061519.973, "dur": 43.521, + "args": { + "External id": 227575,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096061593.944, "dur": 67.083, + "args": { + "External id": 227576,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096061672.325, "dur": 46.674, + "args": { + "External id": 227577,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096061727.474, "dur": 34.973, + "args": { + "External id": 227578,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096061786.714, "dur": 25.314, + "args": { + "External id": 227579,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096061817.950, "dur": 33.184, + "args": { + "External id": 227580,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096061873.477, "dur": 17.524, + "args": { + "External id": 227581,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5065 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 2070552, "tid": 2070552, + "ts": 5327096062060.307, "dur": 80.760, + "args": { + "External id": 227582,"Record function id": 0, "Ev Idx": 5066 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096062212.135, "dur": 45.531, + "args": { + "External id": 227583,"Record function id": 0, "Ev Idx": 5067 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 2070552, "tid": 2070552, + "ts": 5327096062267.314, "dur": 17836.094, + "args": { + "External id": 227584,"Record function id": 0, "Ev Idx": 5068 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2070552, "tid": 2070552, + "ts": 5327096062276.787, "dur": 856.905, + "args": { + "External id": 227585,"Record function id": 0, "Ev Idx": 5069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096062360.472, "dur": 9.046, + "args": { + "External id": 227586,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096062383.671, "dur": 34.168, + "args": { + "External id": 227587,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062388.946, "dur": 2.097, + "args": { + "External id": 227588,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062396.862, "dur": 0.244, + "args": { + "External id": 227589,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062397.911, "dur": 0.220, + "args": { + "External id": 227590,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062398.951, "dur": 0.333, + "args": { + "External id": 227591,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062402.452, "dur": 1.496, + "args": { + "External id": 227592,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062404.791, "dur": 0.405, + "args": { + "External id": 227593,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062406.899, "dur": 2.498, + "args": { + "External id": 227594,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062410.425, "dur": 0.384, + "args": { + "External id": 227595,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062411.418, "dur": 0.431, + "args": { + "External id": 227596,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096062430.562, "dur": 45.698, + "args": { + "External id": 227597,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096062507.850, "dur": 106.884, + "args": { + "External id": 227598,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096062520.196, "dur": 3.599, + "args": { + "External id": 227599,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096062529.103, "dur": 10.095, + "args": { + "External id": 227600,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096062533.781, "dur": 4.976, + "args": { + "External id": 227601,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062536.947, "dur": 0.476, + "args": { + "External id": 227602,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096062546.269, "dur": 26.793, + "args": { + "External id": 227603,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062547.794, "dur": 2.349, + "args": { + "External id": 227604,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062550.781, "dur": 1.441, + "args": { + "External id": 227605,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062554.606, "dur": 0.525, + "args": { + "External id": 227606,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062557.813, "dur": 0.223, + "args": { + "External id": 227607,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062560.042, "dur": 0.161, + "args": { + "External id": 227608,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062560.756, "dur": 0.378, + "args": { + "External id": 227609,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062563.556, "dur": 0.378, + "args": { + "External id": 227610,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062566.067, "dur": 0.385, + "args": { + "External id": 227611,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096062566.936, "dur": 2.202, + "args": { + "External id": 227612,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096062582.829, "dur": 24.073, + "args": { + "External id": 227613,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096062720.323, "dur": 318.738, + "args": { + "External id": 227614,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096062751.959, "dur": 282.354, + "args": { + "External id": 227615,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5099, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096062765.769, "dur": 262.924, + "args": { + "External id": 227616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096063063.765, "dur": 2.439, + "args": { + "External id": 227617,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5101, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2070552, "tid": 2070552, + "ts": 5327096063153.108, "dur": 16721.936, + "args": { + "External id": 227618,"Record function id": 0, "Ev Idx": 5102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096063254.117, "dur": 6.341, + "args": { + "External id": 227619,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096063263.853, "dur": 0.926, + "args": { + "External id": 227620,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096063266.628, "dur": 0.660, + "args": { + "External id": 227621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096063268.966, "dur": 0.793, + "args": { + "External id": 227622,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096063271.090, "dur": 1.776, + "args": { + "External id": 227623,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096063274.103, "dur": 0.936, + "args": { + "External id": 227624,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096063278.398, "dur": 0.860, + "args": { + "External id": 227625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096063280.653, "dur": 3.063, + "args": { + "External id": 227626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096063285.077, "dur": 0.861, + "args": { + "External id": 227627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096063287.198, "dur": 0.823, + "args": { + "External id": 227628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096063308.048, "dur": 16523.425, + "args": { + "External id": 227629,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096063322.575, "dur": 16500.972, + "args": { + "External id": 227630,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096063344.496, "dur": 13.989, + "args": { + "External id": 227631,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096063362.180, "dur": 16428.153, + "args": { + "External id": 227632,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096063364.802, "dur": 16424.768, + "args": { + "External id": 227633,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096063371.137, "dur": 4.648, + "args": { + "External id": 227634,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096063377.433, "dur": 16409.300, + "args": { + "External id": 227635,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096080047.727, "dur": 31.039, + "args": { + "External id": 227636,"Sequence number": 959109, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5120 + } + }, + { + "ph": "s", "id": 67, "pid": 2070552, "tid": 2070552, "ts": 5327096080047.727, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096080064.740, "dur": 9.248, + "args": { + "External id": 227637,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096080068.348, "dur": 5.273, + "args": { + "External id": 227638,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096080142.531, "dur": 85.438, + "args": { + "External id": 227639,"Record function id": 0, "Ev Idx": 5123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096080229.470, "dur": 1150.116, + "args": { + "External id": 227640,"Record function id": 0, "Ev Idx": 5124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096080270.685, "dur": 1094.616, + "args": { + "External id": 227641,"Sequence number": 959110, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5125 + } + }, + { + "ph": "s", "id": 66, "pid": 2070552, "tid": 2070552, "ts": 5327096080270.685, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096080338.750, "dur": 44.658, + "args": { + "External id": 227642,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096080395.844, "dur": 103.762, + "args": { + "External id": 227643,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096080508.006, "dur": 38.801, + "args": { + "External id": 227644,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096080555.672, "dur": 30.520, + "args": { + "External id": 227645,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096080612.241, "dur": 94.123, + "args": { + "External id": 227646,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096080728.315, "dur": 16.256, + "args": { + "External id": 227647,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096080763.157, "dur": 127.887, + "args": { + "External id": 227648,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096080813.372, "dur": 13.535, + "args": { + "External id": 227649,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096080819.130, "dur": 6.877, + "args": { + "External id": 227650,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096080829.221, "dur": 5.429, + "args": { + "External id": 227651,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096080835.727, "dur": 1.189, + "args": { + "External id": 227652,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096080839.200, "dur": 2.346, + "args": { + "External id": 227653,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096080902.573, "dur": 53.656, + "args": { + "External id": 227654,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096081026.835, "dur": 34.013, + "args": { + "External id": 227655,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096081070.997, "dur": 44.995, + "args": { + "External id": 227656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096081123.613, "dur": 34.238, + "args": { + "External id": 227657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096081180.558, "dur": 25.451, + "args": { + "External id": 227658,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096081211.958, "dur": 37.064, + "args": { + "External id": 227659,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096081269.441, "dur": 20.459, + "args": { + "External id": 227660,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5144 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 2070552, "tid": 2070552, + "ts": 5327096081443.552, "dur": 82.738, + "args": { + "External id": 227661,"Record function id": 0, "Ev Idx": 5145 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096081600.261, "dur": 105.583, + "args": { + "External id": 227662,"Record function id": 0, "Ev Idx": 5146 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 2070552, "tid": 2070552, + "ts": 5327096081717.192, "dur": 18391.155, + "args": { + "External id": 227663,"Record function id": 0, "Ev Idx": 5147 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2070552, "tid": 2070552, + "ts": 5327096081728.841, "dur": 819.152, + "args": { + "External id": 227664,"Record function id": 0, "Ev Idx": 5148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096081811.514, "dur": 9.928, + "args": { + "External id": 227665,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096081835.877, "dur": 36.795, + "args": { + "External id": 227666,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096081841.045, "dur": 2.008, + "args": { + "External id": 227667,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096081848.195, "dur": 0.565, + "args": { + "External id": 227668,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096081849.732, "dur": 0.245, + "args": { + "External id": 227669,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096081851.840, "dur": 0.275, + "args": { + "External id": 227670,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096081855.419, "dur": 0.491, + "args": { + "External id": 227671,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096081856.648, "dur": 0.285, + "args": { + "External id": 227672,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096081858.037, "dur": 2.898, + "args": { + "External id": 227673,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096081862.510, "dur": 0.332, + "args": { + "External id": 227674,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096081866.508, "dur": 0.217, + "args": { + "External id": 227675,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096081883.370, "dur": 42.528, + "args": { + "External id": 227676,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096081959.817, "dur": 153.865, + "args": { + "External id": 227677,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096081972.346, "dur": 33.177, + "args": { + "External id": 227678,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096082012.674, "dur": 14.222, + "args": { + "External id": 227679,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096082017.096, "dur": 9.340, + "args": { + "External id": 227680,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096082021.474, "dur": 3.100, + "args": { + "External id": 227681,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096082036.894, "dur": 28.415, + "args": { + "External id": 227682,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096082039.167, "dur": 0.274, + "args": { + "External id": 227683,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096082041.322, "dur": 2.456, + "args": { + "External id": 227684,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096082045.272, "dur": 0.196, + "args": { + "External id": 227685,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096082046.771, "dur": 1.498, + "args": { + "External id": 227686,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096082052.225, "dur": 0.179, + "args": { + "External id": 227687,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096082053.919, "dur": 0.559, + "args": { + "External id": 227688,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096082055.998, "dur": 0.540, + "args": { + "External id": 227689,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096082059.742, "dur": 0.396, + "args": { + "External id": 227690,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096082061.195, "dur": 0.385, + "args": { + "External id": 227691,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096082079.003, "dur": 26.637, + "args": { + "External id": 227692,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096082169.337, "dur": 293.460, + "args": { + "External id": 227693,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096082201.205, "dur": 257.371, + "args": { + "External id": 227694,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5178, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096082212.186, "dur": 240.234, + "args": { + "External id": 227695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096082484.839, "dur": 2.430, + "args": { + "External id": 227696,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5180, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2070552, "tid": 2070552, + "ts": 5327096082567.562, "dur": 17322.104, + "args": { + "External id": 227697,"Record function id": 0, "Ev Idx": 5181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096082713.526, "dur": 6.485, + "args": { + "External id": 227698,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096082724.597, "dur": 1.110, + "args": { + "External id": 227699,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096082727.960, "dur": 1.936, + "args": { + "External id": 227700,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096082732.174, "dur": 0.659, + "args": { + "External id": 227701,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096082734.388, "dur": 0.786, + "args": { + "External id": 227702,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096082736.980, "dur": 0.811, + "args": { + "External id": 227703,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096082741.430, "dur": 0.820, + "args": { + "External id": 227704,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096082743.760, "dur": 1.746, + "args": { + "External id": 227705,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096082747.028, "dur": 0.970, + "args": { + "External id": 227706,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096082749.680, "dur": 0.664, + "args": { + "External id": 227707,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096082770.752, "dur": 17070.425, + "args": { + "External id": 227708,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096082786.040, "dur": 17047.739, + "args": { + "External id": 227709,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096082805.650, "dur": 15.394, + "args": { + "External id": 227710,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096082824.493, "dur": 16976.450, + "args": { + "External id": 227711,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096082826.976, "dur": 16973.122, + "args": { + "External id": 227712,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096082833.595, "dur": 6.547, + "args": { + "External id": 227713,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096082841.983, "dur": 16955.318, + "args": { + "External id": 227714,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096100049.396, "dur": 32.763, + "args": { + "External id": 227715,"Sequence number": 959111, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5199 + } + }, + { + "ph": "s", "id": 65, "pid": 2070552, "tid": 2070552, "ts": 5327096100049.396, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096100068.474, "dur": 8.841, + "args": { + "External id": 227716,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096100072.010, "dur": 4.920, + "args": { + "External id": 227717,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096100148.422, "dur": 83.161, + "args": { + "External id": 227718,"Record function id": 0, "Ev Idx": 5202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096100233.699, "dur": 1083.596, + "args": { + "External id": 227719,"Record function id": 0, "Ev Idx": 5203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096100274.830, "dur": 1028.845, + "args": { + "External id": 227720,"Sequence number": 959112, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5204 + } + }, + { + "ph": "s", "id": 64, "pid": 2070552, "tid": 2070552, "ts": 5327096100274.830, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096100341.226, "dur": 44.287, + "args": { + "External id": 227721,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096100398.812, "dur": 104.806, + "args": { + "External id": 227722,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096100512.343, "dur": 37.963, + "args": { + "External id": 227723,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096100558.294, "dur": 30.482, + "args": { + "External id": 227724,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096100613.596, "dur": 68.902, + "args": { + "External id": 227725,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096100703.292, "dur": 15.620, + "args": { + "External id": 227726,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096100738.725, "dur": 127.338, + "args": { + "External id": 227727,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096100787.899, "dur": 11.886, + "args": { + "External id": 227728,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096100792.921, "dur": 6.104, + "args": { + "External id": 227729,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096100802.349, "dur": 4.928, + "args": { + "External id": 227730,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096100808.545, "dur": 1.044, + "args": { + "External id": 227731,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096100812.204, "dur": 5.753, + "args": { + "External id": 227732,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096100876.669, "dur": 49.855, + "args": { + "External id": 227733,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096100956.500, "dur": 45.804, + "args": { + "External id": 227734,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096101015.646, "dur": 44.511, + "args": { + "External id": 227735,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096101069.393, "dur": 34.406, + "args": { + "External id": 227736,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096101127.279, "dur": 26.410, + "args": { + "External id": 227737,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096101159.396, "dur": 34.536, + "args": { + "External id": 227738,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096101213.861, "dur": 18.160, + "args": { + "External id": 227739,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5223 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 2070552, "tid": 2070552, + "ts": 5327096101382.311, "dur": 75.380, + "args": { + "External id": 227740,"Record function id": 0, "Ev Idx": 5224 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096101530.071, "dur": 50.431, + "args": { + "External id": 227741,"Record function id": 0, "Ev Idx": 5225 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 2070552, "tid": 2070552, + "ts": 5327096101597.271, "dur": 17764.473, + "args": { + "External id": 227742,"Record function id": 0, "Ev Idx": 5226 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2070552, "tid": 2070552, + "ts": 5327096101606.433, "dur": 859.216, + "args": { + "External id": 227743,"Record function id": 0, "Ev Idx": 5227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096101738.770, "dur": 8.583, + "args": { + "External id": 227744,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096101762.718, "dur": 37.746, + "args": { + "External id": 227745,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101768.227, "dur": 2.262, + "args": { + "External id": 227746,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101774.994, "dur": 0.328, + "args": { + "External id": 227747,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101777.329, "dur": 0.625, + "args": { + "External id": 227748,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101779.397, "dur": 0.259, + "args": { + "External id": 227749,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101783.416, "dur": 0.413, + "args": { + "External id": 227750,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101784.927, "dur": 0.588, + "args": { + "External id": 227751,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101787.123, "dur": 4.358, + "args": { + "External id": 227752,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101793.013, "dur": 0.254, + "args": { + "External id": 227753,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101794.437, "dur": 0.449, + "args": { + "External id": 227754,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096101811.746, "dur": 46.174, + "args": { + "External id": 227755,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096101892.091, "dur": 141.767, + "args": { + "External id": 227756,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096101904.602, "dur": 3.658, + "args": { + "External id": 227757,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096101913.475, "dur": 10.959, + "args": { + "External id": 227758,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096101918.131, "dur": 5.846, + "args": { + "External id": 227759,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101921.832, "dur": 0.820, + "args": { + "External id": 227760,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096101931.029, "dur": 39.059, + "args": { + "External id": 227761,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101933.088, "dur": 2.720, + "args": { + "External id": 227762,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101937.061, "dur": 0.589, + "args": { + "External id": 227763,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101938.866, "dur": 0.617, + "args": { + "External id": 227764,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101942.894, "dur": 2.056, + "args": { + "External id": 227765,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101946.268, "dur": 0.304, + "args": { + "External id": 227766,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101956.181, "dur": 0.458, + "args": { + "External id": 227767,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101959.863, "dur": 0.689, + "args": { + "External id": 227768,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101961.780, "dur": 0.800, + "args": { + "External id": 227769,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096101963.656, "dur": 2.433, + "args": { + "External id": 227770,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096101996.688, "dur": 27.372, + "args": { + "External id": 227771,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096102089.753, "dur": 290.042, + "args": { + "External id": 227772,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096102122.124, "dur": 252.837, + "args": { + "External id": 227773,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5257, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096102133.409, "dur": 235.988, + "args": { + "External id": 227774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096102402.257, "dur": 2.140, + "args": { + "External id": 227775,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5259, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2070552, "tid": 2070552, + "ts": 5327096102485.312, "dur": 16672.191, + "args": { + "External id": 227776,"Record function id": 0, "Ev Idx": 5260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096102580.732, "dur": 5.726, + "args": { + "External id": 227777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096102590.801, "dur": 1.322, + "args": { + "External id": 227778,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096102593.926, "dur": 3.179, + "args": { + "External id": 227779,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096102599.142, "dur": 0.973, + "args": { + "External id": 227780,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096102601.723, "dur": 0.784, + "args": { + "External id": 227781,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096102604.014, "dur": 0.895, + "args": { + "External id": 227782,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096102608.221, "dur": 1.004, + "args": { + "External id": 227783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096102610.751, "dur": 2.040, + "args": { + "External id": 227784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096102614.497, "dur": 0.953, + "args": { + "External id": 227785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096102616.862, "dur": 0.949, + "args": { + "External id": 227786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096102681.246, "dur": 16424.174, + "args": { + "External id": 227787,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096102698.133, "dur": 16399.543, + "args": { + "External id": 227788,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096102720.640, "dur": 15.401, + "args": { + "External id": 227789,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096102739.913, "dur": 16322.522, + "args": { + "External id": 227790,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096102742.689, "dur": 16318.996, + "args": { + "External id": 227791,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096102748.706, "dur": 7.265, + "args": { + "External id": 227792,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096102757.588, "dur": 16301.208, + "args": { + "External id": 227793,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096119305.262, "dur": 31.742, + "args": { + "External id": 227794,"Sequence number": 959113, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5278 + } + }, + { + "ph": "s", "id": 63, "pid": 2070552, "tid": 2070552, "ts": 5327096119305.262, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096119322.727, "dur": 9.382, + "args": { + "External id": 227795,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096119326.535, "dur": 5.302, + "args": { + "External id": 227796,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096119402.508, "dur": 82.334, + "args": { + "External id": 227797,"Record function id": 0, "Ev Idx": 5281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096119486.565, "dur": 1076.597, + "args": { + "External id": 227798,"Record function id": 0, "Ev Idx": 5282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096119526.841, "dur": 1023.861, + "args": { + "External id": 227799,"Sequence number": 959114, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5283 + } + }, + { + "ph": "s", "id": 62, "pid": 2070552, "tid": 2070552, "ts": 5327096119526.841, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096119593.432, "dur": 81.232, + "args": { + "External id": 227800,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096119692.082, "dur": 110.392, + "args": { + "External id": 227801,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096119812.266, "dur": 37.674, + "args": { + "External id": 227802,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096119858.203, "dur": 30.081, + "args": { + "External id": 227803,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096119916.297, "dur": 26.735, + "args": { + "External id": 227804,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096119959.646, "dur": 13.753, + "args": { + "External id": 227805,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096120008.584, "dur": 131.423, + "args": { + "External id": 227806,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096120058.767, "dur": 11.538, + "args": { + "External id": 227807,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096120063.664, "dur": 5.866, + "args": { + "External id": 227808,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096120073.186, "dur": 5.832, + "args": { + "External id": 227809,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096120080.371, "dur": 1.346, + "args": { + "External id": 227810,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096120084.151, "dur": 5.250, + "args": { + "External id": 227811,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096120151.300, "dur": 48.146, + "args": { + "External id": 227812,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096120231.224, "dur": 27.476, + "args": { + "External id": 227813,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096120268.680, "dur": 40.665, + "args": { + "External id": 227814,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096120318.636, "dur": 34.835, + "args": { + "External id": 227815,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096120374.149, "dur": 23.187, + "args": { + "External id": 227816,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096120403.369, "dur": 34.406, + "args": { + "External id": 227817,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096120458.363, "dur": 21.301, + "args": { + "External id": 227818,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5302 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 2070552, "tid": 2070552, + "ts": 5327096120665.669, "dur": 83.554, + "args": { + "External id": 227819,"Record function id": 0, "Ev Idx": 5303 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096120824.402, "dur": 47.806, + "args": { + "External id": 227820,"Record function id": 0, "Ev Idx": 5304 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 2070552, "tid": 2070552, + "ts": 5327096120881.726, "dur": 18285.586, + "args": { + "External id": 227821,"Record function id": 0, "Ev Idx": 5305 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2070552, "tid": 2070552, + "ts": 5327096120889.671, "dur": 882.511, + "args": { + "External id": 227822,"Record function id": 0, "Ev Idx": 5306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096120970.713, "dur": 24.991, + "args": { + "External id": 227823,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096121012.159, "dur": 38.168, + "args": { + "External id": 227824,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121017.812, "dur": 2.210, + "args": { + "External id": 227825,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121024.388, "dur": 0.401, + "args": { + "External id": 227826,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121026.003, "dur": 0.377, + "args": { + "External id": 227827,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121027.920, "dur": 0.628, + "args": { + "External id": 227828,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121031.754, "dur": 0.536, + "args": { + "External id": 227829,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121033.809, "dur": 0.507, + "args": { + "External id": 227830,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121035.640, "dur": 4.711, + "args": { + "External id": 227831,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121041.628, "dur": 0.506, + "args": { + "External id": 227832,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121043.616, "dur": 0.617, + "args": { + "External id": 227833,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096121063.754, "dur": 45.944, + "args": { + "External id": 227834,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096121146.108, "dur": 140.140, + "args": { + "External id": 227835,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096121158.207, "dur": 5.530, + "args": { + "External id": 227836,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096121168.945, "dur": 10.152, + "args": { + "External id": 227837,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096121173.147, "dur": 5.488, + "args": { + "External id": 227838,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121176.534, "dur": 0.648, + "args": { + "External id": 227839,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096121185.954, "dur": 49.535, + "args": { + "External id": 227840,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121212.011, "dur": 0.483, + "args": { + "External id": 227841,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121216.298, "dur": 0.397, + "args": { + "External id": 227842,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121217.961, "dur": 0.302, + "args": { + "External id": 227843,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121219.329, "dur": 1.996, + "args": { + "External id": 227844,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121222.641, "dur": 0.410, + "args": { + "External id": 227845,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121224.423, "dur": 0.364, + "args": { + "External id": 227846,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121228.293, "dur": 0.215, + "args": { + "External id": 227847,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121229.274, "dur": 0.360, + "args": { + "External id": 227848,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096121230.957, "dur": 0.154, + "args": { + "External id": 227849,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096121252.432, "dur": 25.819, + "args": { + "External id": 227850,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096121341.319, "dur": 332.902, + "args": { + "External id": 227851,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096121372.815, "dur": 296.113, + "args": { + "External id": 227852,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5336, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096121384.919, "dur": 276.785, + "args": { + "External id": 227853,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096121699.368, "dur": 2.494, + "args": { + "External id": 227854,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5338, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2070552, "tid": 2070552, + "ts": 5327096121793.062, "dur": 17152.376, + "args": { + "External id": 227855,"Record function id": 0, "Ev Idx": 5339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096121894.108, "dur": 6.370, + "args": { + "External id": 227856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096121904.611, "dur": 1.515, + "args": { + "External id": 227857,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096121908.102, "dur": 2.923, + "args": { + "External id": 227858,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096121912.969, "dur": 0.700, + "args": { + "External id": 227859,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096121915.388, "dur": 0.925, + "args": { + "External id": 227860,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096121917.924, "dur": 0.876, + "args": { + "External id": 227861,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096121922.415, "dur": 0.814, + "args": { + "External id": 227862,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096121924.527, "dur": 2.042, + "args": { + "External id": 227863,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096121927.947, "dur": 1.057, + "args": { + "External id": 227864,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096121930.548, "dur": 0.880, + "args": { + "External id": 227865,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096121950.680, "dur": 16940.441, + "args": { + "External id": 227866,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096121965.288, "dur": 16917.487, + "args": { + "External id": 227867,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096122001.203, "dur": 16.253, + "args": { + "External id": 227868,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096122021.236, "dur": 16826.787, + "args": { + "External id": 227869,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096122023.808, "dur": 16823.121, + "args": { + "External id": 227870,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096122029.808, "dur": 7.129, + "args": { + "External id": 227871,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096122038.414, "dur": 16805.370, + "args": { + "External id": 227872,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096139109.539, "dur": 31.058, + "args": { + "External id": 227873,"Sequence number": 959115, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5357 + } + }, + { + "ph": "s", "id": 61, "pid": 2070552, "tid": 2070552, "ts": 5327096139109.539, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096139125.708, "dur": 9.554, + "args": { + "External id": 227874,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096139129.417, "dur": 5.487, + "args": { + "External id": 227875,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096139206.642, "dur": 80.695, + "args": { + "External id": 227876,"Record function id": 0, "Ev Idx": 5360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096139289.147, "dur": 1085.914, + "args": { + "External id": 227877,"Record function id": 0, "Ev Idx": 5361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096139329.314, "dur": 1032.567, + "args": { + "External id": 227878,"Sequence number": 959116, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5362 + } + }, + { + "ph": "s", "id": 60, "pid": 2070552, "tid": 2070552, "ts": 5327096139329.314, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096139395.655, "dur": 43.484, + "args": { + "External id": 227879,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096139451.931, "dur": 107.941, + "args": { + "External id": 227880,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096139568.814, "dur": 37.111, + "args": { + "External id": 227881,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096139614.103, "dur": 76.927, + "args": { + "External id": 227882,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096139722.626, "dur": 27.200, + "args": { + "External id": 227883,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096139766.754, "dur": 13.553, + "args": { + "External id": 227884,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096139800.426, "dur": 130.514, + "args": { + "External id": 227885,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096139851.152, "dur": 11.117, + "args": { + "External id": 227886,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096139856.156, "dur": 5.207, + "args": { + "External id": 227887,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096139864.712, "dur": 5.424, + "args": { + "External id": 227888,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096139871.800, "dur": 1.186, + "args": { + "External id": 227889,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096139875.106, "dur": 5.654, + "args": { + "External id": 227890,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096139941.802, "dur": 62.694, + "args": { + "External id": 227891,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096140038.990, "dur": 28.218, + "args": { + "External id": 227892,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096140077.887, "dur": 43.484, + "args": { + "External id": 227893,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096140129.246, "dur": 34.749, + "args": { + "External id": 227894,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096140186.613, "dur": 25.043, + "args": { + "External id": 227895,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096140217.810, "dur": 34.079, + "args": { + "External id": 227896,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096140272.082, "dur": 17.578, + "args": { + "External id": 227897,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5381 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 2070552, "tid": 2070552, + "ts": 5327096140440.263, "dur": 79.745, + "args": { + "External id": 227898,"Record function id": 0, "Ev Idx": 5382 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096140592.183, "dur": 91.460, + "args": { + "External id": 227899,"Record function id": 0, "Ev Idx": 5383 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 2070552, "tid": 2070552, + "ts": 5327096140694.747, "dur": 18045.786, + "args": { + "External id": 227900,"Record function id": 0, "Ev Idx": 5384 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2070552, "tid": 2070552, + "ts": 5327096140703.937, "dur": 790.308, + "args": { + "External id": 227901,"Record function id": 0, "Ev Idx": 5385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096140788.707, "dur": 9.200, + "args": { + "External id": 227902,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096140811.973, "dur": 36.415, + "args": { + "External id": 227903,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096140817.170, "dur": 2.634, + "args": { + "External id": 227904,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096140824.290, "dur": 0.736, + "args": { + "External id": 227905,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096140826.136, "dur": 0.634, + "args": { + "External id": 227906,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096140828.122, "dur": 0.459, + "args": { + "External id": 227907,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096140831.685, "dur": 0.550, + "args": { + "External id": 227908,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096140833.408, "dur": 0.535, + "args": { + "External id": 227909,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096140834.889, "dur": 3.761, + "args": { + "External id": 227910,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096140840.099, "dur": 0.531, + "args": { + "External id": 227911,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096140841.850, "dur": 0.407, + "args": { + "External id": 227912,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096140859.228, "dur": 45.937, + "args": { + "External id": 227913,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096140937.902, "dur": 130.058, + "args": { + "External id": 227914,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096140949.212, "dur": 3.956, + "args": { + "External id": 227915,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096140958.592, "dur": 10.156, + "args": { + "External id": 227916,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096140963.422, "dur": 4.878, + "args": { + "External id": 227917,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096140966.506, "dur": 0.582, + "args": { + "External id": 227918,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096140975.365, "dur": 45.264, + "args": { + "External id": 227919,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096140993.251, "dur": 2.052, + "args": { + "External id": 227920,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096140996.809, "dur": 0.293, + "args": { + "External id": 227921,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096140998.330, "dur": 0.643, + "args": { + "External id": 227922,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096141002.117, "dur": 2.463, + "args": { + "External id": 227923,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096141005.678, "dur": 0.829, + "args": { + "External id": 227924,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096141007.572, "dur": 0.420, + "args": { + "External id": 227925,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096141010.584, "dur": 0.490, + "args": { + "External id": 227926,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096141012.636, "dur": 0.473, + "args": { + "External id": 227927,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096141014.131, "dur": 2.269, + "args": { + "External id": 227928,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096141032.284, "dur": 27.220, + "args": { + "External id": 227929,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096141122.298, "dur": 289.754, + "args": { + "External id": 227930,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096141155.117, "dur": 252.445, + "args": { + "External id": 227931,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5415, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096141164.880, "dur": 237.038, + "args": { + "External id": 227932,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096141431.849, "dur": 2.105, + "args": { + "External id": 227933,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5417, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2070552, "tid": 2070552, + "ts": 5327096141514.388, "dur": 16968.145, + "args": { + "External id": 227934,"Record function id": 0, "Ev Idx": 5418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096141609.743, "dur": 5.226, + "args": { + "External id": 227935,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096141659.142, "dur": 2.631, + "args": { + "External id": 227936,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096141665.826, "dur": 3.168, + "args": { + "External id": 227937,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096141670.986, "dur": 1.126, + "args": { + "External id": 227938,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096141673.785, "dur": 1.034, + "args": { + "External id": 227939,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096141676.278, "dur": 1.115, + "args": { + "External id": 227940,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096141681.163, "dur": 0.846, + "args": { + "External id": 227941,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096141683.521, "dur": 2.082, + "args": { + "External id": 227942,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096141686.986, "dur": 0.732, + "args": { + "External id": 227943,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096141689.079, "dur": 0.807, + "args": { + "External id": 227944,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096141710.539, "dur": 16722.881, + "args": { + "External id": 227945,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096141725.850, "dur": 16699.948, + "args": { + "External id": 227946,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096141748.541, "dur": 16.039, + "args": { + "External id": 227947,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096141768.115, "dur": 16625.753, + "args": { + "External id": 227948,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096141770.692, "dur": 16622.427, + "args": { + "External id": 227949,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096141776.406, "dur": 6.161, + "args": { + "External id": 227950,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096141784.637, "dur": 16605.412, + "args": { + "External id": 227951,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096158681.770, "dur": 31.955, + "args": { + "External id": 227952,"Sequence number": 959117, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5436 + } + }, + { + "ph": "s", "id": 59, "pid": 2070552, "tid": 2070552, "ts": 5327096158681.770, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096158699.718, "dur": 9.103, + "args": { + "External id": 227953,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096158703.464, "dur": 5.000, + "args": { + "External id": 227954,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096158779.160, "dur": 83.039, + "args": { + "External id": 227955,"Record function id": 0, "Ev Idx": 5439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096158863.909, "dur": 1099.641, + "args": { + "External id": 227956,"Record function id": 0, "Ev Idx": 5440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096158904.071, "dur": 1046.973, + "args": { + "External id": 227957,"Sequence number": 959118, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5441 + } + }, + { + "ph": "s", "id": 58, "pid": 2070552, "tid": 2070552, "ts": 5327096158904.071, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096158972.935, "dur": 66.546, + "args": { + "External id": 227958,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096159054.691, "dur": 105.484, + "args": { + "External id": 227959,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096159171.491, "dur": 38.009, + "args": { + "External id": 227960,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096159218.241, "dur": 30.644, + "args": { + "External id": 227961,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096159274.637, "dur": 26.742, + "args": { + "External id": 227962,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096159319.587, "dur": 15.060, + "args": { + "External id": 227963,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096159354.814, "dur": 127.986, + "args": { + "External id": 227964,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096159403.775, "dur": 10.645, + "args": { + "External id": 227965,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096159408.617, "dur": 4.955, + "args": { + "External id": 227966,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096159416.937, "dur": 5.282, + "args": { + "External id": 227967,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096159423.555, "dur": 1.238, + "args": { + "External id": 227968,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096159427.259, "dur": 4.663, + "args": { + "External id": 227969,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096159493.437, "dur": 45.817, + "args": { + "External id": 227970,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096159570.717, "dur": 26.890, + "args": { + "External id": 227971,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096159607.418, "dur": 87.195, + "args": { + "External id": 227972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096159714.091, "dur": 37.726, + "args": { + "External id": 227973,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096159779.750, "dur": 25.538, + "args": { + "External id": 227974,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096159813.639, "dur": 34.445, + "args": { + "External id": 227975,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096159865.707, "dur": 17.929, + "args": { + "External id": 227976,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5460 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 2070552, "tid": 2070552, + "ts": 5327096160046.761, "dur": 79.210, + "args": { + "External id": 227977,"Record function id": 0, "Ev Idx": 5461 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096160200.970, "dur": 45.621, + "args": { + "External id": 227978,"Record function id": 0, "Ev Idx": 5462 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 2070552, "tid": 2070552, + "ts": 5327096160255.662, "dur": 18062.822, + "args": { + "External id": 227979,"Record function id": 0, "Ev Idx": 5463 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2070552, "tid": 2070552, + "ts": 5327096160264.251, "dur": 895.778, + "args": { + "External id": 227980,"Record function id": 0, "Ev Idx": 5464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096160347.265, "dur": 8.622, + "args": { + "External id": 227981,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096160370.120, "dur": 35.708, + "args": { + "External id": 227982,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160375.021, "dur": 2.396, + "args": { + "External id": 227983,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160381.901, "dur": 0.374, + "args": { + "External id": 227984,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160383.847, "dur": 0.732, + "args": { + "External id": 227985,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160385.950, "dur": 0.515, + "args": { + "External id": 227986,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160389.218, "dur": 0.555, + "args": { + "External id": 227987,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160390.895, "dur": 0.388, + "args": { + "External id": 227988,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160392.457, "dur": 3.968, + "args": { + "External id": 227989,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160397.297, "dur": 0.508, + "args": { + "External id": 227990,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160399.179, "dur": 0.340, + "args": { + "External id": 227991,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096160416.042, "dur": 42.212, + "args": { + "External id": 227992,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096160490.377, "dur": 111.317, + "args": { + "External id": 227993,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096160502.032, "dur": 3.655, + "args": { + "External id": 227994,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096160510.737, "dur": 12.124, + "args": { + "External id": 227995,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096160515.332, "dur": 7.059, + "args": { + "External id": 227996,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160520.473, "dur": 0.535, + "args": { + "External id": 227997,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096160529.824, "dur": 27.729, + "args": { + "External id": 227998,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160531.457, "dur": 2.272, + "args": { + "External id": 227999,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160534.703, "dur": 0.608, + "args": { + "External id": 228000,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160536.512, "dur": 0.581, + "args": { + "External id": 228001,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160540.009, "dur": 2.125, + "args": { + "External id": 228002,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160543.257, "dur": 0.463, + "args": { + "External id": 228003,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160544.893, "dur": 0.140, + "args": { + "External id": 228004,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160548.422, "dur": 0.514, + "args": { + "External id": 228005,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160549.830, "dur": 0.431, + "args": { + "External id": 228006,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096160551.473, "dur": 2.313, + "args": { + "External id": 228007,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096160569.475, "dur": 24.671, + "args": { + "External id": 228008,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096160699.124, "dur": 365.462, + "args": { + "External id": 228009,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096160732.941, "dur": 326.542, + "args": { + "External id": 228010,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5494, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096160743.424, "dur": 310.215, + "args": { + "External id": 228011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096161089.150, "dur": 2.253, + "args": { + "External id": 228012,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5496, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2070552, "tid": 2070552, + "ts": 5327096161181.105, "dur": 16938.272, + "args": { + "External id": 228013,"Record function id": 0, "Ev Idx": 5497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096161279.596, "dur": 6.381, + "args": { + "External id": 228014,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096161290.427, "dur": 1.438, + "args": { + "External id": 228015,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096161293.774, "dur": 3.050, + "args": { + "External id": 228016,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096161298.593, "dur": 0.911, + "args": { + "External id": 228017,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096161300.856, "dur": 1.058, + "args": { + "External id": 228018,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096161303.230, "dur": 0.661, + "args": { + "External id": 228019,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096161307.647, "dur": 0.864, + "args": { + "External id": 228020,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096161310.258, "dur": 1.759, + "args": { + "External id": 228021,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096161313.543, "dur": 0.893, + "args": { + "External id": 228022,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096161315.639, "dur": 0.771, + "args": { + "External id": 228023,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096161335.192, "dur": 16736.313, + "args": { + "External id": 228024,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096161350.016, "dur": 16713.859, + "args": { + "External id": 228025,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096161369.946, "dur": 15.933, + "args": { + "External id": 228026,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096161389.586, "dur": 16640.818, + "args": { + "External id": 228027,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096161392.273, "dur": 16637.449, + "args": { + "External id": 228028,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096161398.237, "dur": 6.549, + "args": { + "External id": 228029,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096161406.593, "dur": 16619.930, + "args": { + "External id": 228030,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096178263.185, "dur": 30.339, + "args": { + "External id": 228031,"Sequence number": 959119, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5515 + } + }, + { + "ph": "s", "id": 57, "pid": 2070552, "tid": 2070552, "ts": 5327096178263.185, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096178279.922, "dur": 8.978, + "args": { + "External id": 228032,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096178283.520, "dur": 5.153, + "args": { + "External id": 228033,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096178357.157, "dur": 82.490, + "args": { + "External id": 228034,"Record function id": 0, "Ev Idx": 5518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096178441.481, "dur": 1056.132, + "args": { + "External id": 228035,"Record function id": 0, "Ev Idx": 5519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096178483.445, "dur": 1000.968, + "args": { + "External id": 228036,"Sequence number": 959120, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5520 + } + }, + { + "ph": "s", "id": 56, "pid": 2070552, "tid": 2070552, "ts": 5327096178483.445, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096178548.864, "dur": 42.238, + "args": { + "External id": 228037,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096178603.671, "dur": 134.649, + "args": { + "External id": 228038,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096178752.614, "dur": 39.114, + "args": { + "External id": 228039,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096178800.126, "dur": 31.123, + "args": { + "External id": 228040,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096178857.352, "dur": 25.888, + "args": { + "External id": 228041,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096178900.026, "dur": 13.960, + "args": { + "External id": 228042,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096178934.444, "dur": 148.379, + "args": { + "External id": 228043,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096179000.560, "dur": 12.576, + "args": { + "External id": 228044,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096179006.129, "dur": 5.947, + "args": { + "External id": 228045,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096179015.716, "dur": 5.287, + "args": { + "External id": 228046,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096179022.485, "dur": 1.019, + "args": { + "External id": 228047,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096179025.798, "dur": 5.857, + "args": { + "External id": 228048,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096179093.742, "dur": 48.069, + "args": { + "External id": 228049,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096179171.472, "dur": 27.967, + "args": { + "External id": 228050,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096179209.222, "dur": 40.781, + "args": { + "External id": 228051,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096179257.829, "dur": 34.121, + "args": { + "External id": 228052,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096179314.265, "dur": 25.588, + "args": { + "External id": 228053,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096179345.267, "dur": 33.588, + "args": { + "External id": 228054,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096179397.483, "dur": 16.786, + "args": { + "External id": 228055,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5539 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 2070552, "tid": 2070552, + "ts": 5327096179560.518, "dur": 117.777, + "args": { + "External id": 228056,"Record function id": 0, "Ev Idx": 5540 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096179755.966, "dur": 46.236, + "args": { + "External id": 228057,"Record function id": 0, "Ev Idx": 5541 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 2070552, "tid": 2070552, + "ts": 5327096179811.290, "dur": 18061.437, + "args": { + "External id": 228058,"Record function id": 0, "Ev Idx": 5542 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2070552, "tid": 2070552, + "ts": 5327096179819.830, "dur": 780.047, + "args": { + "External id": 228059,"Record function id": 0, "Ev Idx": 5543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096179900.141, "dur": 8.841, + "args": { + "External id": 228060,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096179923.403, "dur": 35.139, + "args": { + "External id": 228061,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096179928.601, "dur": 2.040, + "args": { + "External id": 228062,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096179934.690, "dur": 0.474, + "args": { + "External id": 228063,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096179936.285, "dur": 0.620, + "args": { + "External id": 228064,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096179938.067, "dur": 0.688, + "args": { + "External id": 228065,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096179941.747, "dur": 0.973, + "args": { + "External id": 228066,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096179943.904, "dur": 0.459, + "args": { + "External id": 228067,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096179945.141, "dur": 3.951, + "args": { + "External id": 228068,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096179950.532, "dur": 0.575, + "args": { + "External id": 228069,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096179952.131, "dur": 0.522, + "args": { + "External id": 228070,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096179968.731, "dur": 59.822, + "args": { + "External id": 228071,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096180064.186, "dur": 113.136, + "args": { + "External id": 228072,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096180077.062, "dur": 5.008, + "args": { + "External id": 228073,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096180087.367, "dur": 10.805, + "args": { + "External id": 228074,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096180091.930, "dur": 5.814, + "args": { + "External id": 228075,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096180095.306, "dur": 0.786, + "args": { + "External id": 228076,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096180105.276, "dur": 28.798, + "args": { + "External id": 228077,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096180106.850, "dur": 2.299, + "args": { + "External id": 228078,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096180110.545, "dur": 0.813, + "args": { + "External id": 228079,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096180112.413, "dur": 0.527, + "args": { + "External id": 228080,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096180115.791, "dur": 2.123, + "args": { + "External id": 228081,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096180118.800, "dur": 0.499, + "args": { + "External id": 228082,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096180120.399, "dur": 0.495, + "args": { + "External id": 228083,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096180123.607, "dur": 1.079, + "args": { + "External id": 228084,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096180125.963, "dur": 0.401, + "args": { + "External id": 228085,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096180127.264, "dur": 2.390, + "args": { + "External id": 228086,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096180144.563, "dur": 25.067, + "args": { + "External id": 228087,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096180229.270, "dur": 288.104, + "args": { + "External id": 228088,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096180262.916, "dur": 250.365, + "args": { + "External id": 228089,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5573, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096180273.298, "dur": 235.390, + "args": { + "External id": 228090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096180538.937, "dur": 2.337, + "args": { + "External id": 228091,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5575, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2070552, "tid": 2070552, + "ts": 5327096180618.797, "dur": 17045.181, + "args": { + "External id": 228092,"Record function id": 0, "Ev Idx": 5576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096180762.396, "dur": 6.009, + "args": { + "External id": 228093,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096180771.987, "dur": 1.149, + "args": { + "External id": 228094,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096180774.695, "dur": 2.747, + "args": { + "External id": 228095,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096180779.165, "dur": 0.753, + "args": { + "External id": 228096,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096180781.419, "dur": 1.019, + "args": { + "External id": 228097,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096180784.538, "dur": 0.888, + "args": { + "External id": 228098,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096180789.389, "dur": 0.700, + "args": { + "External id": 228099,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096180791.592, "dur": 1.924, + "args": { + "External id": 228100,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096180795.067, "dur": 0.844, + "args": { + "External id": 228101,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096180797.317, "dur": 0.872, + "args": { + "External id": 228102,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096180818.842, "dur": 16774.246, + "args": { + "External id": 228103,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096180833.930, "dur": 16751.335, + "args": { + "External id": 228104,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096180854.440, "dur": 15.844, + "args": { + "External id": 228105,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096180873.546, "dur": 16679.287, + "args": { + "External id": 228106,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096180876.253, "dur": 16675.975, + "args": { + "External id": 228107,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096180882.062, "dur": 5.222, + "args": { + "External id": 228108,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096180888.858, "dur": 16660.382, + "args": { + "External id": 228109,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096197809.846, "dur": 36.355, + "args": { + "External id": 228110,"Sequence number": 959121, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5594 + } + }, + { + "ph": "s", "id": 55, "pid": 2070552, "tid": 2070552, "ts": 5327096197809.846, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096197832.623, "dur": 9.006, + "args": { + "External id": 228111,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096197836.142, "dur": 5.148, + "args": { + "External id": 228112,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096197911.108, "dur": 93.254, + "args": { + "External id": 228113,"Record function id": 0, "Ev Idx": 5597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096198008.053, "dur": 1072.596, + "args": { + "External id": 228114,"Record function id": 0, "Ev Idx": 5598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096198050.071, "dur": 1017.227, + "args": { + "External id": 228115,"Sequence number": 959122, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5599 + } + }, + { + "ph": "s", "id": 54, "pid": 2070552, "tid": 2070552, "ts": 5327096198050.071, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096198117.927, "dur": 45.075, + "args": { + "External id": 228116,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096198175.959, "dur": 105.203, + "args": { + "External id": 228117,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096198292.846, "dur": 37.167, + "args": { + "External id": 228118,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096198338.177, "dur": 30.962, + "args": { + "External id": 228119,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096198392.950, "dur": 23.133, + "args": { + "External id": 228120,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096198433.789, "dur": 13.900, + "args": { + "External id": 228121,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096198468.091, "dur": 127.740, + "args": { + "External id": 228122,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096198520.078, "dur": 10.870, + "args": { + "External id": 228123,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096198525.104, "dur": 5.063, + "args": { + "External id": 228124,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096198533.296, "dur": 4.902, + "args": { + "External id": 228125,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096198539.337, "dur": 0.997, + "args": { + "External id": 228126,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096198542.709, "dur": 4.578, + "args": { + "External id": 228127,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096198606.996, "dur": 87.264, + "args": { + "External id": 228128,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096198731.198, "dur": 29.443, + "args": { + "External id": 228129,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096198769.773, "dur": 43.252, + "args": { + "External id": 228130,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096198823.051, "dur": 34.886, + "args": { + "External id": 228131,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096198878.214, "dur": 25.453, + "args": { + "External id": 228132,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096198909.219, "dur": 33.460, + "args": { + "External id": 228133,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096198963.429, "dur": 34.438, + "args": { + "External id": 228134,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5618 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 2070552, "tid": 2070552, + "ts": 5327096199144.272, "dur": 76.059, + "args": { + "External id": 228135,"Record function id": 0, "Ev Idx": 5619 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096199294.431, "dur": 47.163, + "args": { + "External id": 228136,"Record function id": 0, "Ev Idx": 5620 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 2070552, "tid": 2070552, + "ts": 5327096199351.172, "dur": 17860.073, + "args": { + "External id": 228137,"Record function id": 0, "Ev Idx": 5621 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2070552, "tid": 2070552, + "ts": 5327096199359.414, "dur": 846.575, + "args": { + "External id": 228138,"Record function id": 0, "Ev Idx": 5622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096199440.107, "dur": 7.345, + "args": { + "External id": 228139,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096199461.245, "dur": 38.087, + "args": { + "External id": 228140,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199467.129, "dur": 2.153, + "args": { + "External id": 228141,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199474.088, "dur": 0.391, + "args": { + "External id": 228142,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199475.764, "dur": 0.530, + "args": { + "External id": 228143,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199478.300, "dur": 0.598, + "args": { + "External id": 228144,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199482.238, "dur": 0.691, + "args": { + "External id": 228145,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199484.125, "dur": 0.768, + "args": { + "External id": 228146,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199486.193, "dur": 3.833, + "args": { + "External id": 228147,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199491.524, "dur": 0.427, + "args": { + "External id": 228148,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199493.500, "dur": 0.364, + "args": { + "External id": 228149,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096199510.356, "dur": 40.869, + "args": { + "External id": 228150,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096199583.531, "dur": 169.886, + "args": { + "External id": 228151,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096199594.611, "dur": 3.879, + "args": { + "External id": 228152,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096199603.672, "dur": 10.792, + "args": { + "External id": 228153,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096199608.322, "dur": 5.708, + "args": { + "External id": 228154,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199612.103, "dur": 0.727, + "args": { + "External id": 228155,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096199663.786, "dur": 36.219, + "args": { + "External id": 228156,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199666.627, "dur": 2.774, + "args": { + "External id": 228157,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199671.178, "dur": 0.480, + "args": { + "External id": 228158,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199673.276, "dur": 0.471, + "args": { + "External id": 228159,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199676.940, "dur": 2.082, + "args": { + "External id": 228160,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199683.174, "dur": 0.571, + "args": { + "External id": 228161,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199685.442, "dur": 0.765, + "args": { + "External id": 228162,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199689.625, "dur": 0.599, + "args": { + "External id": 228163,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199691.566, "dur": 0.413, + "args": { + "External id": 228164,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096199693.450, "dur": 2.297, + "args": { + "External id": 228165,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096199714.422, "dur": 30.910, + "args": { + "External id": 228166,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096199808.453, "dur": 302.957, + "args": { + "External id": 228167,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096199841.998, "dur": 264.783, + "args": { + "External id": 228168,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5652, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096199852.232, "dur": 248.899, + "args": { + "External id": 228169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096200136.162, "dur": 2.222, + "args": { + "External id": 228170,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5654, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2070552, "tid": 2070552, + "ts": 5327096200227.877, "dur": 16766.796, + "args": { + "External id": 228171,"Record function id": 0, "Ev Idx": 5655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096200328.100, "dur": 5.974, + "args": { + "External id": 228172,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096200337.883, "dur": 1.264, + "args": { + "External id": 228173,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096200341.109, "dur": 3.182, + "args": { + "External id": 228174,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096200346.201, "dur": 1.072, + "args": { + "External id": 228175,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096200348.662, "dur": 1.007, + "args": { + "External id": 228176,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096200350.877, "dur": 0.994, + "args": { + "External id": 228177,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096200355.191, "dur": 1.019, + "args": { + "External id": 228178,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096200357.673, "dur": 1.695, + "args": { + "External id": 228179,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096200360.946, "dur": 0.929, + "args": { + "External id": 228180,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096200363.580, "dur": 0.659, + "args": { + "External id": 228181,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096200385.531, "dur": 16546.009, + "args": { + "External id": 228182,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096200401.016, "dur": 16522.359, + "args": { + "External id": 228183,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096200422.536, "dur": 15.749, + "args": { + "External id": 228184,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096200441.822, "dur": 16445.885, + "args": { + "External id": 228185,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096200444.060, "dur": 16442.863, + "args": { + "External id": 228186,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096200450.134, "dur": 6.393, + "args": { + "External id": 228187,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096200458.160, "dur": 16425.410, + "args": { + "External id": 228188,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096217144.964, "dur": 39.441, + "args": { + "External id": 228189,"Sequence number": 959123, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5673 + } + }, + { + "ph": "s", "id": 53, "pid": 2070552, "tid": 2070552, "ts": 5327096217144.964, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096217169.892, "dur": 9.701, + "args": { + "External id": 228190,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096217173.904, "dur": 5.395, + "args": { + "External id": 228191,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096217250.073, "dur": 84.266, + "args": { + "External id": 228192,"Record function id": 0, "Ev Idx": 5676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096217335.984, "dur": 1091.513, + "args": { + "External id": 228193,"Record function id": 0, "Ev Idx": 5677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096217375.747, "dur": 1038.020, + "args": { + "External id": 228194,"Sequence number": 959124, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5678 + } + }, + { + "ph": "s", "id": 52, "pid": 2070552, "tid": 2070552, "ts": 5327096217375.747, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096217443.623, "dur": 43.861, + "args": { + "External id": 228195,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096217500.522, "dur": 107.010, + "args": { + "External id": 228196,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096217617.997, "dur": 83.783, + "args": { + "External id": 228197,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096217714.327, "dur": 31.269, + "args": { + "External id": 228198,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096217774.047, "dur": 25.606, + "args": { + "External id": 228199,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096217817.744, "dur": 14.833, + "args": { + "External id": 228200,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096217858.943, "dur": 146.156, + "args": { + "External id": 228201,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096217909.284, "dur": 10.958, + "args": { + "External id": 228202,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096217914.213, "dur": 5.156, + "args": { + "External id": 228203,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096217923.209, "dur": 6.420, + "args": { + "External id": 228204,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096217931.287, "dur": 1.021, + "args": { + "External id": 228205,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096217934.781, "dur": 4.295, + "args": { + "External id": 228206,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096218018.269, "dur": 50.567, + "args": { + "External id": 228207,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096218102.509, "dur": 29.033, + "args": { + "External id": 228208,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096218140.523, "dur": 40.725, + "args": { + "External id": 228209,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096218189.110, "dur": 34.806, + "args": { + "External id": 228210,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096218245.878, "dur": 24.888, + "args": { + "External id": 228211,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096218276.419, "dur": 34.205, + "args": { + "External id": 228212,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096218332.210, "dur": 16.752, + "args": { + "External id": 228213,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5697 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 2070552, "tid": 2070552, + "ts": 5327096218491.525, "dur": 76.294, + "args": { + "External id": 228214,"Record function id": 0, "Ev Idx": 5698 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096218686.163, "dur": 50.036, + "args": { + "External id": 228215,"Record function id": 0, "Ev Idx": 5699 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 2070552, "tid": 2070552, + "ts": 5327096218746.845, "dur": 17761.078, + "args": { + "External id": 228216,"Record function id": 0, "Ev Idx": 5700 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2070552, "tid": 2070552, + "ts": 5327096218756.028, "dur": 806.420, + "args": { + "External id": 228217,"Record function id": 0, "Ev Idx": 5701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096218841.274, "dur": 8.959, + "args": { + "External id": 228218,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096218864.249, "dur": 38.680, + "args": { + "External id": 228219,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096218869.923, "dur": 2.028, + "args": { + "External id": 228220,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096218876.714, "dur": 0.418, + "args": { + "External id": 228221,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096218878.591, "dur": 0.515, + "args": { + "External id": 228222,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096218881.006, "dur": 0.414, + "args": { + "External id": 228223,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096218884.469, "dur": 0.729, + "args": { + "External id": 228224,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096218886.777, "dur": 0.912, + "args": { + "External id": 228225,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096218888.990, "dur": 4.045, + "args": { + "External id": 228226,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096218894.731, "dur": 0.778, + "args": { + "External id": 228227,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096218896.947, "dur": 0.338, + "args": { + "External id": 228228,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096218914.008, "dur": 46.802, + "args": { + "External id": 228229,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096219016.688, "dur": 120.710, + "args": { + "External id": 228230,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096219029.109, "dur": 5.132, + "args": { + "External id": 228231,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096219039.392, "dur": 11.771, + "args": { + "External id": 228232,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096219043.708, "dur": 7.006, + "args": { + "External id": 228233,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096219047.743, "dur": 1.266, + "args": { + "External id": 228234,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096219058.745, "dur": 31.940, + "args": { + "External id": 228235,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096219061.366, "dur": 2.818, + "args": { + "External id": 228236,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096219065.710, "dur": 0.864, + "args": { + "External id": 228237,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096219068.068, "dur": 0.373, + "args": { + "External id": 228238,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096219071.997, "dur": 2.115, + "args": { + "External id": 228239,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096219075.481, "dur": 0.359, + "args": { + "External id": 228240,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096219077.480, "dur": 0.360, + "args": { + "External id": 228241,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096219080.897, "dur": 0.515, + "args": { + "External id": 228242,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096219082.641, "dur": 0.360, + "args": { + "External id": 228243,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096219084.103, "dur": 2.158, + "args": { + "External id": 228244,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096219101.745, "dur": 27.672, + "args": { + "External id": 228245,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096219191.007, "dur": 287.001, + "args": { + "External id": 228246,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096219223.336, "dur": 250.146, + "args": { + "External id": 228247,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5731, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096219233.262, "dur": 234.692, + "args": { + "External id": 228248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096219499.814, "dur": 2.503, + "args": { + "External id": 228249,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5733, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2070552, "tid": 2070552, + "ts": 5327096219583.339, "dur": 16725.650, + "args": { + "External id": 228250,"Record function id": 0, "Ev Idx": 5734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096219720.140, "dur": 6.520, + "args": { + "External id": 228251,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096219730.346, "dur": 1.112, + "args": { + "External id": 228252,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096219733.360, "dur": 3.288, + "args": { + "External id": 228253,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096219738.323, "dur": 0.982, + "args": { + "External id": 228254,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096219740.522, "dur": 1.169, + "args": { + "External id": 228255,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096219743.026, "dur": 0.986, + "args": { + "External id": 228256,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096219747.452, "dur": 1.138, + "args": { + "External id": 228257,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096219749.965, "dur": 1.991, + "args": { + "External id": 228258,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096219753.578, "dur": 0.833, + "args": { + "External id": 228259,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096219756.003, "dur": 0.735, + "args": { + "External id": 228260,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096219778.095, "dur": 16489.090, + "args": { + "External id": 228261,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096219793.623, "dur": 16465.941, + "args": { + "External id": 228262,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096219817.269, "dur": 15.367, + "args": { + "External id": 228263,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096219836.350, "dur": 16389.784, + "args": { + "External id": 228264,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096219839.180, "dur": 16386.297, + "args": { + "External id": 228265,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096219845.591, "dur": 5.382, + "args": { + "External id": 228266,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096219852.677, "dur": 16369.867, + "args": { + "External id": 228267,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096236448.805, "dur": 35.210, + "args": { + "External id": 228268,"Sequence number": 959125, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5752 + } + }, + { + "ph": "s", "id": 51, "pid": 2070552, "tid": 2070552, "ts": 5327096236448.805, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096236470.450, "dur": 8.996, + "args": { + "External id": 228269,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096236474.188, "dur": 5.040, + "args": { + "External id": 228270,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096236545.301, "dur": 131.313, + "args": { + "External id": 228271,"Record function id": 0, "Ev Idx": 5755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096236679.692, "dur": 1086.996, + "args": { + "External id": 228272,"Record function id": 0, "Ev Idx": 5756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096236722.729, "dur": 1029.934, + "args": { + "External id": 228273,"Sequence number": 959126, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5757 + } + }, + { + "ph": "s", "id": 50, "pid": 2070552, "tid": 2070552, "ts": 5327096236722.729, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096236790.538, "dur": 46.684, + "args": { + "External id": 228274,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096236850.469, "dur": 105.251, + "args": { + "External id": 228275,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096236967.398, "dur": 55.707, + "args": { + "External id": 228276,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096237034.130, "dur": 32.467, + "args": { + "External id": 228277,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096237098.231, "dur": 28.203, + "args": { + "External id": 228278,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096237143.371, "dur": 13.958, + "args": { + "External id": 228279,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096237177.175, "dur": 131.302, + "args": { + "External id": 228280,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096237229.273, "dur": 11.202, + "args": { + "External id": 228281,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096237234.693, "dur": 5.150, + "args": { + "External id": 228282,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096237243.363, "dur": 5.476, + "args": { + "External id": 228283,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096237250.062, "dur": 1.086, + "args": { + "External id": 228284,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096237253.753, "dur": 5.276, + "args": { + "External id": 228285,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096237319.360, "dur": 47.948, + "args": { + "External id": 228286,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096237399.349, "dur": 27.967, + "args": { + "External id": 228287,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096237436.943, "dur": 39.896, + "args": { + "External id": 228288,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096237483.120, "dur": 34.563, + "args": { + "External id": 228289,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096237541.619, "dur": 24.594, + "args": { + "External id": 228290,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096237574.464, "dur": 33.274, + "args": { + "External id": 228291,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096237662.350, "dur": 21.321, + "args": { + "External id": 228292,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5776 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 2070552, "tid": 2070552, + "ts": 5327096237832.626, "dur": 78.378, + "args": { + "External id": 228293,"Record function id": 0, "Ev Idx": 5777 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096238000.592, "dur": 50.835, + "args": { + "External id": 228294,"Record function id": 0, "Ev Idx": 5778 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 2070552, "tid": 2070552, + "ts": 5327096238061.818, "dur": 18027.018, + "args": { + "External id": 228295,"Record function id": 0, "Ev Idx": 5779 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2070552, "tid": 2070552, + "ts": 5327096238070.641, "dur": 837.035, + "args": { + "External id": 228296,"Record function id": 0, "Ev Idx": 5780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096238154.755, "dur": 8.997, + "args": { + "External id": 228297,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096238178.081, "dur": 39.140, + "args": { + "External id": 228298,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238183.677, "dur": 2.224, + "args": { + "External id": 228299,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238190.747, "dur": 0.778, + "args": { + "External id": 228300,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238193.181, "dur": 0.357, + "args": { + "External id": 228301,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238195.494, "dur": 0.789, + "args": { + "External id": 228302,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238199.212, "dur": 0.326, + "args": { + "External id": 228303,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238201.143, "dur": 0.731, + "args": { + "External id": 228304,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238203.220, "dur": 3.784, + "args": { + "External id": 228305,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238208.617, "dur": 0.544, + "args": { + "External id": 228306,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238210.354, "dur": 0.539, + "args": { + "External id": 228307,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096238228.234, "dur": 41.072, + "args": { + "External id": 228308,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096238302.310, "dur": 119.164, + "args": { + "External id": 228309,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096238314.116, "dur": 4.157, + "args": { + "External id": 228310,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096238323.224, "dur": 10.731, + "args": { + "External id": 228311,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096238327.785, "dur": 5.742, + "args": { + "External id": 228312,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238331.420, "dur": 0.705, + "args": { + "External id": 228313,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096238341.146, "dur": 32.157, + "args": { + "External id": 228314,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238343.434, "dur": 2.244, + "args": { + "External id": 228315,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238347.056, "dur": 0.604, + "args": { + "External id": 228316,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238349.193, "dur": 0.405, + "args": { + "External id": 228317,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238353.849, "dur": 1.776, + "args": { + "External id": 228318,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238356.909, "dur": 0.242, + "args": { + "External id": 228319,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238358.278, "dur": 0.712, + "args": { + "External id": 228320,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238362.264, "dur": 0.502, + "args": { + "External id": 228321,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238364.134, "dur": 1.008, + "args": { + "External id": 228322,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096238366.442, "dur": 2.405, + "args": { + "External id": 228323,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096238388.965, "dur": 24.579, + "args": { + "External id": 228324,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096238473.018, "dur": 339.336, + "args": { + "External id": 228325,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096238514.963, "dur": 292.341, + "args": { + "External id": 228326,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5810, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096238525.226, "dur": 274.279, + "args": { + "External id": 228327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096238838.656, "dur": 2.442, + "args": { + "External id": 228328,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5812, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2070552, "tid": 2070552, + "ts": 5327096238928.309, "dur": 16932.050, + "args": { + "External id": 228329,"Record function id": 0, "Ev Idx": 5813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096239047.842, "dur": 7.207, + "args": { + "External id": 228330,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096239058.799, "dur": 1.163, + "args": { + "External id": 228331,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096239061.728, "dur": 3.123, + "args": { + "External id": 228332,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096239066.314, "dur": 1.354, + "args": { + "External id": 228333,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096239068.918, "dur": 1.198, + "args": { + "External id": 228334,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096239073.366, "dur": 0.661, + "args": { + "External id": 228335,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096239075.468, "dur": 1.232, + "args": { + "External id": 228336,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096239078.276, "dur": 1.916, + "args": { + "External id": 228337,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096239081.767, "dur": 0.968, + "args": { + "External id": 228338,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096239086.013, "dur": 0.639, + "args": { + "External id": 228339,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096239105.693, "dur": 16710.896, + "args": { + "External id": 228340,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096239122.003, "dur": 16686.835, + "args": { + "External id": 228341,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096239142.260, "dur": 14.483, + "args": { + "External id": 228342,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096239160.307, "dur": 16611.778, + "args": { + "External id": 228343,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096239162.991, "dur": 16608.347, + "args": { + "External id": 228344,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096239169.525, "dur": 6.897, + "args": { + "External id": 228345,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096239178.109, "dur": 16590.265, + "args": { + "External id": 228346,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096256019.766, "dur": 39.716, + "args": { + "External id": 228347,"Sequence number": 959127, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5831 + } + }, + { + "ph": "s", "id": 49, "pid": 2070552, "tid": 2070552, "ts": 5327096256019.766, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096256044.678, "dur": 10.052, + "args": { + "External id": 228348,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096256048.977, "dur": 5.361, + "args": { + "External id": 228349,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096256127.480, "dur": 85.907, + "args": { + "External id": 228350,"Record function id": 0, "Ev Idx": 5834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096256215.328, "dur": 1102.148, + "args": { + "External id": 228351,"Record function id": 0, "Ev Idx": 5835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096256255.779, "dur": 1048.136, + "args": { + "External id": 228352,"Sequence number": 959128, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5836 + } + }, + { + "ph": "s", "id": 48, "pid": 2070552, "tid": 2070552, "ts": 5327096256255.779, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096256348.082, "dur": 46.360, + "args": { + "External id": 228353,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096256409.175, "dur": 104.897, + "args": { + "External id": 228354,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096256522.968, "dur": 37.478, + "args": { + "External id": 228355,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096256566.041, "dur": 30.854, + "args": { + "External id": 228356,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096256666.114, "dur": 27.619, + "args": { + "External id": 228357,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096256711.239, "dur": 16.962, + "args": { + "External id": 228358,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096256747.246, "dur": 131.775, + "args": { + "External id": 228359,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096256798.165, "dur": 12.551, + "args": { + "External id": 228360,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096256803.748, "dur": 5.999, + "args": { + "External id": 228361,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096256813.616, "dur": 4.996, + "args": { + "External id": 228362,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096256820.065, "dur": 2.674, + "args": { + "External id": 228363,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096256825.125, "dur": 4.696, + "args": { + "External id": 228364,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096256890.124, "dur": 49.874, + "args": { + "External id": 228365,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096256968.767, "dur": 44.698, + "args": { + "External id": 228366,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096257024.807, "dur": 44.425, + "args": { + "External id": 228367,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096257076.150, "dur": 33.912, + "args": { + "External id": 228368,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096257135.318, "dur": 24.776, + "args": { + "External id": 228369,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096257166.070, "dur": 33.907, + "args": { + "External id": 228370,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096257218.325, "dur": 19.952, + "args": { + "External id": 228371,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5855 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 2070552, "tid": 2070552, + "ts": 5327096257381.243, "dur": 77.691, + "args": { + "External id": 228372,"Record function id": 0, "Ev Idx": 5856 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096257530.027, "dur": 47.280, + "args": { + "External id": 228373,"Record function id": 0, "Ev Idx": 5857 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 2070552, "tid": 2070552, + "ts": 5327096257586.839, "dur": 18168.073, + "args": { + "External id": 228374,"Record function id": 0, "Ev Idx": 5858 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2070552, "tid": 2070552, + "ts": 5327096257595.281, "dur": 868.446, + "args": { + "External id": 228375,"Record function id": 0, "Ev Idx": 5859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096257719.537, "dur": 9.724, + "args": { + "External id": 228376,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096257743.697, "dur": 37.418, + "args": { + "External id": 228377,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257749.195, "dur": 2.399, + "args": { + "External id": 228378,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257756.341, "dur": 0.218, + "args": { + "External id": 228379,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257758.062, "dur": 0.389, + "args": { + "External id": 228380,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257759.747, "dur": 0.516, + "args": { + "External id": 228381,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257763.776, "dur": 0.382, + "args": { + "External id": 228382,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257765.445, "dur": 0.473, + "args": { + "External id": 228383,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257767.533, "dur": 3.209, + "args": { + "External id": 228384,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257772.392, "dur": 0.279, + "args": { + "External id": 228385,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257774.184, "dur": 0.317, + "args": { + "External id": 228386,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096257791.634, "dur": 42.845, + "args": { + "External id": 228387,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096257868.728, "dur": 128.575, + "args": { + "External id": 228388,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096257881.141, "dur": 3.940, + "args": { + "External id": 228389,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096257890.200, "dur": 10.533, + "args": { + "External id": 228390,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096257894.860, "dur": 5.428, + "args": { + "External id": 228391,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257898.192, "dur": 0.743, + "args": { + "External id": 228392,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096257908.118, "dur": 30.216, + "args": { + "External id": 228393,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257910.755, "dur": 2.548, + "args": { + "External id": 228394,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257914.797, "dur": 0.455, + "args": { + "External id": 228395,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257916.544, "dur": 0.626, + "args": { + "External id": 228396,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257920.716, "dur": 1.508, + "args": { + "External id": 228397,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257923.593, "dur": 0.527, + "args": { + "External id": 228398,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257925.368, "dur": 0.158, + "args": { + "External id": 228399,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257929.364, "dur": 0.160, + "args": { + "External id": 228400,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257930.782, "dur": 0.151, + "args": { + "External id": 228401,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096257932.280, "dur": 1.675, + "args": { + "External id": 228402,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096257949.012, "dur": 23.609, + "args": { + "External id": 228403,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096258052.836, "dur": 323.486, + "args": { + "External id": 228404,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096258086.447, "dur": 285.136, + "args": { + "External id": 228405,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5889, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096258097.236, "dur": 269.162, + "args": { + "External id": 228406,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096258396.931, "dur": 2.328, + "args": { + "External id": 228407,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5891, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2070552, "tid": 2070552, + "ts": 5327096258484.540, "dur": 17020.906, + "args": { + "External id": 228408,"Record function id": 0, "Ev Idx": 5892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096258585.452, "dur": 5.835, + "args": { + "External id": 228409,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096258594.867, "dur": 1.108, + "args": { + "External id": 228410,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096258597.779, "dur": 2.126, + "args": { + "External id": 228411,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096258601.550, "dur": 1.160, + "args": { + "External id": 228412,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096258603.966, "dur": 0.957, + "args": { + "External id": 228413,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096258606.163, "dur": 0.631, + "args": { + "External id": 228414,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096258610.179, "dur": 0.527, + "args": { + "External id": 228415,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096258612.318, "dur": 1.585, + "args": { + "External id": 228416,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096258615.896, "dur": 0.727, + "args": { + "External id": 228417,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096258618.227, "dur": 0.791, + "args": { + "External id": 228418,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096258679.619, "dur": 16779.657, + "args": { + "External id": 228419,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096258696.627, "dur": 16754.467, + "args": { + "External id": 228420,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096258720.625, "dur": 15.430, + "args": { + "External id": 228421,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096258739.796, "dur": 16676.121, + "args": { + "External id": 228422,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096258742.475, "dur": 16672.812, + "args": { + "External id": 228423,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096258747.771, "dur": 5.902, + "args": { + "External id": 228424,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096258755.378, "dur": 16657.000, + "args": { + "External id": 228425,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096275688.899, "dur": 37.964, + "args": { + "External id": 228426,"Sequence number": 959129, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5910 + } + }, + { + "ph": "s", "id": 47, "pid": 2070552, "tid": 2070552, "ts": 5327096275688.899, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096275712.323, "dur": 9.557, + "args": { + "External id": 228427,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096275715.676, "dur": 5.783, + "args": { + "External id": 228428,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096275794.510, "dur": 81.866, + "args": { + "External id": 228429,"Record function id": 0, "Ev Idx": 5913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096275878.251, "dur": 1064.380, + "args": { + "External id": 228430,"Record function id": 0, "Ev Idx": 5914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096275920.051, "dur": 1009.462, + "args": { + "External id": 228431,"Sequence number": 959130, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5915 + } + }, + { + "ph": "s", "id": 46, "pid": 2070552, "tid": 2070552, "ts": 5327096275920.051, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096276006.925, "dur": 46.422, + "args": { + "External id": 228432,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096276069.679, "dur": 105.178, + "args": { + "External id": 228433,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096276184.203, "dur": 37.278, + "args": { + "External id": 228434,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096276227.046, "dur": 30.336, + "args": { + "External id": 228435,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096276285.248, "dur": 25.157, + "args": { + "External id": 228436,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096276325.181, "dur": 15.872, + "args": { + "External id": 228437,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096276358.518, "dur": 125.673, + "args": { + "External id": 228438,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096276406.494, "dur": 11.533, + "args": { + "External id": 228439,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096276411.745, "dur": 5.521, + "args": { + "External id": 228440,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096276420.852, "dur": 5.039, + "args": { + "External id": 228441,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096276427.253, "dur": 2.943, + "args": { + "External id": 228442,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096276432.580, "dur": 4.415, + "args": { + "External id": 228443,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096276495.131, "dur": 44.033, + "args": { + "External id": 228444,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096276567.631, "dur": 27.309, + "args": { + "External id": 228445,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096276603.176, "dur": 83.177, + "args": { + "External id": 228446,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096276697.794, "dur": 37.426, + "args": { + "External id": 228447,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096276760.724, "dur": 25.373, + "args": { + "External id": 228448,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096276791.861, "dur": 34.634, + "args": { + "External id": 228449,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096276844.093, "dur": 20.539, + "args": { + "External id": 228450,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5934 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 2070552, "tid": 2070552, + "ts": 5327096277021.368, "dur": 79.370, + "args": { + "External id": 228451,"Record function id": 0, "Ev Idx": 5935 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096277174.672, "dur": 45.723, + "args": { + "External id": 228452,"Record function id": 0, "Ev Idx": 5936 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 2070552, "tid": 2070552, + "ts": 5327096277229.340, "dur": 18083.400, + "args": { + "External id": 228453,"Record function id": 0, "Ev Idx": 5937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2070552, "tid": 2070552, + "ts": 5327096277237.699, "dur": 842.394, + "args": { + "External id": 228454,"Record function id": 0, "Ev Idx": 5938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096277318.729, "dur": 9.223, + "args": { + "External id": 228455,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096277342.944, "dur": 37.855, + "args": { + "External id": 228456,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277348.569, "dur": 2.106, + "args": { + "External id": 228457,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277355.382, "dur": 0.259, + "args": { + "External id": 228458,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277357.520, "dur": 0.443, + "args": { + "External id": 228459,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277359.192, "dur": 0.425, + "args": { + "External id": 228460,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277362.813, "dur": 0.243, + "args": { + "External id": 228461,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277364.501, "dur": 0.470, + "args": { + "External id": 228462,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277366.974, "dur": 3.158, + "args": { + "External id": 228463,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277372.002, "dur": 0.527, + "args": { + "External id": 228464,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277374.123, "dur": 0.374, + "args": { + "External id": 228465,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096277391.944, "dur": 42.631, + "args": { + "External id": 228466,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096277466.622, "dur": 110.362, + "args": { + "External id": 228467,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096277478.233, "dur": 3.857, + "args": { + "External id": 228468,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096277487.158, "dur": 10.912, + "args": { + "External id": 228469,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096277491.690, "dur": 5.938, + "args": { + "External id": 228470,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277495.629, "dur": 0.555, + "args": { + "External id": 228471,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096277504.794, "dur": 28.309, + "args": { + "External id": 228472,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277506.869, "dur": 2.486, + "args": { + "External id": 228473,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277510.933, "dur": 0.327, + "args": { + "External id": 228474,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277512.541, "dur": 0.337, + "args": { + "External id": 228475,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277515.922, "dur": 1.505, + "args": { + "External id": 228476,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277518.780, "dur": 0.314, + "args": { + "External id": 228477,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277520.195, "dur": 0.317, + "args": { + "External id": 228478,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277524.040, "dur": 0.140, + "args": { + "External id": 228479,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277525.597, "dur": 0.191, + "args": { + "External id": 228480,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096277527.162, "dur": 1.998, + "args": { + "External id": 228481,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096277544.439, "dur": 24.052, + "args": { + "External id": 228482,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096277669.909, "dur": 295.909, + "args": { + "External id": 228483,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096277704.181, "dur": 257.006, + "args": { + "External id": 228484,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5968, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096277719.335, "dur": 236.264, + "args": { + "External id": 228485,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096278006.277, "dur": 3.131, + "args": { + "External id": 228486,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5970, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2070552, "tid": 2070552, + "ts": 5327096278100.711, "dur": 17010.625, + "args": { + "External id": 228487,"Record function id": 0, "Ev Idx": 5971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096278199.693, "dur": 6.489, + "args": { + "External id": 228488,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096278209.615, "dur": 1.170, + "args": { + "External id": 228489,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096278212.440, "dur": 1.988, + "args": { + "External id": 228490,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096278216.118, "dur": 0.951, + "args": { + "External id": 228491,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096278218.392, "dur": 0.896, + "args": { + "External id": 228492,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096278220.377, "dur": 0.712, + "args": { + "External id": 228493,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096278224.587, "dur": 0.743, + "args": { + "External id": 228494,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096278226.777, "dur": 1.948, + "args": { + "External id": 228495,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096278230.172, "dur": 0.885, + "args": { + "External id": 228496,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096278232.351, "dur": 0.596, + "args": { + "External id": 228497,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096278252.286, "dur": 16814.423, + "args": { + "External id": 228498,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096278267.449, "dur": 16791.509, + "args": { + "External id": 228499,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096278287.049, "dur": 13.841, + "args": { + "External id": 228500,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096278304.440, "dur": 16718.932, + "args": { + "External id": 228501,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096278306.932, "dur": 16715.723, + "args": { + "External id": 228502,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096278312.368, "dur": 6.216, + "args": { + "External id": 228503,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096278320.134, "dur": 16699.082, + "args": { + "External id": 228504,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096295252.731, "dur": 36.413, + "args": { + "External id": 228505,"Sequence number": 959131, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5989 + } + }, + { + "ph": "s", "id": 45, "pid": 2070552, "tid": 2070552, "ts": 5327096295252.731, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096295275.583, "dur": 8.995, + "args": { + "External id": 228506,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096295279.443, "dur": 4.940, + "args": { + "External id": 228507,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096295350.635, "dur": 83.199, + "args": { + "External id": 228508,"Record function id": 0, "Ev Idx": 5992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096295435.427, "dur": 1064.015, + "args": { + "External id": 228509,"Record function id": 0, "Ev Idx": 5993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096295475.368, "dur": 1010.798, + "args": { + "External id": 228510,"Sequence number": 959132, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5994 + } + }, + { + "ph": "s", "id": 44, "pid": 2070552, "tid": 2070552, "ts": 5327096295475.368, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096295543.074, "dur": 42.368, + "args": { + "External id": 228511,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096295598.751, "dur": 133.977, + "args": { + "External id": 228512,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096295745.967, "dur": 40.685, + "args": { + "External id": 228513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096295795.322, "dur": 30.561, + "args": { + "External id": 228514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096295854.553, "dur": 26.677, + "args": { + "External id": 228515,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096295898.889, "dur": 14.302, + "args": { + "External id": 228516,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096295932.417, "dur": 143.998, + "args": { + "External id": 228517,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096295995.763, "dur": 12.447, + "args": { + "External id": 228518,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096296001.188, "dur": 6.027, + "args": { + "External id": 228519,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096296010.910, "dur": 5.458, + "args": { + "External id": 228520,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096296017.819, "dur": 0.940, + "args": { + "External id": 228521,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096296021.194, "dur": 3.796, + "args": { + "External id": 228522,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096296088.316, "dur": 49.406, + "args": { + "External id": 228523,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096296168.356, "dur": 30.217, + "args": { + "External id": 228524,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096296207.133, "dur": 41.641, + "args": { + "External id": 228525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096296257.028, "dur": 34.637, + "args": { + "External id": 228526,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096296314.580, "dur": 25.208, + "args": { + "External id": 228527,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096296346.214, "dur": 34.169, + "args": { + "External id": 228528,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096296399.387, "dur": 18.018, + "args": { + "External id": 228529,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6013 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 2070552, "tid": 2070552, + "ts": 5327096296561.756, "dur": 120.650, + "args": { + "External id": 228530,"Record function id": 0, "Ev Idx": 6014 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096296761.008, "dur": 48.231, + "args": { + "External id": 228531,"Record function id": 0, "Ev Idx": 6015 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 2070552, "tid": 2070552, + "ts": 5327096296818.577, "dur": 18173.508, + "args": { + "External id": 228532,"Record function id": 0, "Ev Idx": 6016 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2070552, "tid": 2070552, + "ts": 5327096296826.589, "dur": 776.819, + "args": { + "External id": 228533,"Record function id": 0, "Ev Idx": 6017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096296909.139, "dur": 9.108, + "args": { + "External id": 228534,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096296932.199, "dur": 35.330, + "args": { + "External id": 228535,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096296937.685, "dur": 2.228, + "args": { + "External id": 228536,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096296944.619, "dur": 0.225, + "args": { + "External id": 228537,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096296946.228, "dur": 0.364, + "args": { + "External id": 228538,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096296947.965, "dur": 0.369, + "args": { + "External id": 228539,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096296951.475, "dur": 0.313, + "args": { + "External id": 228540,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096296953.227, "dur": 0.251, + "args": { + "External id": 228541,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096296954.687, "dur": 3.381, + "args": { + "External id": 228542,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096296959.367, "dur": 0.183, + "args": { + "External id": 228543,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096296961.037, "dur": 0.160, + "args": { + "External id": 228544,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096296993.818, "dur": 43.411, + "args": { + "External id": 228545,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096297072.249, "dur": 112.094, + "args": { + "External id": 228546,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096297083.729, "dur": 4.906, + "args": { + "External id": 228547,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096297094.015, "dur": 11.164, + "args": { + "External id": 228548,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096297098.468, "dur": 6.257, + "args": { + "External id": 228549,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096297102.475, "dur": 0.671, + "args": { + "External id": 228550,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096297112.676, "dur": 28.507, + "args": { + "External id": 228551,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096297115.351, "dur": 2.656, + "args": { + "External id": 228552,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096297119.439, "dur": 0.322, + "args": { + "External id": 228553,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096297120.873, "dur": 0.421, + "args": { + "External id": 228554,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096297124.257, "dur": 1.462, + "args": { + "External id": 228555,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096297127.335, "dur": 0.411, + "args": { + "External id": 228556,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096297128.938, "dur": 0.433, + "args": { + "External id": 228557,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096297132.213, "dur": 0.191, + "args": { + "External id": 228558,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096297133.634, "dur": 0.261, + "args": { + "External id": 228559,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096297135.099, "dur": 2.041, + "args": { + "External id": 228560,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096297151.918, "dur": 23.719, + "args": { + "External id": 228561,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096297236.805, "dur": 281.565, + "args": { + "External id": 228562,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096297271.493, "dur": 242.470, + "args": { + "External id": 228563,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6047, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096297281.333, "dur": 227.725, + "args": { + "External id": 228564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096297540.621, "dur": 2.245, + "args": { + "External id": 228565,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6049, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2070552, "tid": 2070552, + "ts": 5327096297664.881, "dur": 17107.335, + "args": { + "External id": 228566,"Record function id": 0, "Ev Idx": 6050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096297770.594, "dur": 6.540, + "args": { + "External id": 228567,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096297780.974, "dur": 0.998, + "args": { + "External id": 228568,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096297783.696, "dur": 1.896, + "args": { + "External id": 228569,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096297787.246, "dur": 0.886, + "args": { + "External id": 228570,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096297789.565, "dur": 0.879, + "args": { + "External id": 228571,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096297791.593, "dur": 0.668, + "args": { + "External id": 228572,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096297795.241, "dur": 0.842, + "args": { + "External id": 228573,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096297797.419, "dur": 2.070, + "args": { + "External id": 228574,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096297801.127, "dur": 0.685, + "args": { + "External id": 228575,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096297803.146, "dur": 0.653, + "args": { + "External id": 228576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096297824.302, "dur": 16904.943, + "args": { + "External id": 228577,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096297839.675, "dur": 16882.212, + "args": { + "External id": 228578,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096297862.580, "dur": 15.252, + "args": { + "External id": 228579,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096297881.332, "dur": 16806.301, + "args": { + "External id": 228580,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096297883.874, "dur": 16802.968, + "args": { + "External id": 228581,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096297889.417, "dur": 5.591, + "args": { + "External id": 228582,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096297896.578, "dur": 16787.483, + "args": { + "External id": 228583,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096314911.248, "dur": 41.659, + "args": { + "External id": 228584,"Sequence number": 959133, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6068 + } + }, + { + "ph": "s", "id": 43, "pid": 2070552, "tid": 2070552, "ts": 5327096314911.248, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096314939.206, "dur": 8.775, + "args": { + "External id": 228585,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096314942.731, "dur": 5.063, + "args": { + "External id": 228586,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096315032.318, "dur": 83.015, + "args": { + "External id": 228587,"Record function id": 0, "Ev Idx": 6071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096315117.012, "dur": 1071.655, + "args": { + "External id": 228588,"Record function id": 0, "Ev Idx": 6072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096315158.486, "dur": 1016.730, + "args": { + "External id": 228589,"Sequence number": 959134, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6073 + } + }, + { + "ph": "s", "id": 42, "pid": 2070552, "tid": 2070552, "ts": 5327096315158.486, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096315225.933, "dur": 43.132, + "args": { + "External id": 228590,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096315280.988, "dur": 105.701, + "args": { + "External id": 228591,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096315398.327, "dur": 38.094, + "args": { + "External id": 228592,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096315442.861, "dur": 30.946, + "args": { + "External id": 228593,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096315500.373, "dur": 23.695, + "args": { + "External id": 228594,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096315538.952, "dur": 13.269, + "args": { + "External id": 228595,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096315570.302, "dur": 172.922, + "args": { + "External id": 228596,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096315616.482, "dur": 53.192, + "args": { + "External id": 228597,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096315661.797, "dur": 6.621, + "args": { + "External id": 228598,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096315672.745, "dur": 5.431, + "args": { + "External id": 228599,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096315679.664, "dur": 0.948, + "args": { + "External id": 228600,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096315684.834, "dur": 3.801, + "args": { + "External id": 228601,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096315755.244, "dur": 53.591, + "args": { + "External id": 228602,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096315838.927, "dur": 30.384, + "args": { + "External id": 228603,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096315878.362, "dur": 40.579, + "args": { + "External id": 228604,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096315924.642, "dur": 34.359, + "args": { + "External id": 228605,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096315997.279, "dur": 28.982, + "args": { + "External id": 228606,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096316033.079, "dur": 37.385, + "args": { + "External id": 228607,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096316089.193, "dur": 17.531, + "args": { + "External id": 228608,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6092 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 2070552, "tid": 2070552, + "ts": 5327096316253.942, "dur": 79.641, + "args": { + "External id": 228609,"Record function id": 0, "Ev Idx": 6093 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096316406.459, "dur": 45.849, + "args": { + "External id": 228610,"Record function id": 0, "Ev Idx": 6094 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 2070552, "tid": 2070552, + "ts": 5327096316462.356, "dur": 18301.450, + "args": { + "External id": 228611,"Record function id": 0, "Ev Idx": 6095 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2070552, "tid": 2070552, + "ts": 5327096316471.349, "dur": 855.775, + "args": { + "External id": 228612,"Record function id": 0, "Ev Idx": 6096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096316550.941, "dur": 7.932, + "args": { + "External id": 228613,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096316573.086, "dur": 34.043, + "args": { + "External id": 228614,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316578.675, "dur": 2.095, + "args": { + "External id": 228615,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316585.619, "dur": 0.329, + "args": { + "External id": 228616,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316587.398, "dur": 0.322, + "args": { + "External id": 228617,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316589.130, "dur": 0.375, + "args": { + "External id": 228618,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316592.530, "dur": 0.425, + "args": { + "External id": 228619,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316594.440, "dur": 0.348, + "args": { + "External id": 228620,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316596.164, "dur": 2.116, + "args": { + "External id": 228621,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316599.698, "dur": 0.328, + "args": { + "External id": 228622,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316601.281, "dur": 0.160, + "args": { + "External id": 228623,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096316617.894, "dur": 95.871, + "args": { + "External id": 228624,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096316754.249, "dur": 120.699, + "args": { + "External id": 228625,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096316766.335, "dur": 5.175, + "args": { + "External id": 228626,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096316776.607, "dur": 11.802, + "args": { + "External id": 228627,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096316781.058, "dur": 6.903, + "args": { + "External id": 228628,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316785.248, "dur": 0.746, + "args": { + "External id": 228629,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096316796.020, "dur": 33.760, + "args": { + "External id": 228630,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316798.611, "dur": 2.176, + "args": { + "External id": 228631,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316802.162, "dur": 0.357, + "args": { + "External id": 228632,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316809.160, "dur": 0.513, + "args": { + "External id": 228633,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316812.599, "dur": 1.408, + "args": { + "External id": 228634,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316815.404, "dur": 0.211, + "args": { + "External id": 228635,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316817.009, "dur": 2.012, + "args": { + "External id": 228636,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316820.395, "dur": 0.146, + "args": { + "External id": 228637,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316821.658, "dur": 0.183, + "args": { + "External id": 228638,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096316825.704, "dur": 0.144, + "args": { + "External id": 228639,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096316843.677, "dur": 23.476, + "args": { + "External id": 228640,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096316932.528, "dur": 304.805, + "args": { + "External id": 228641,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096316964.992, "dur": 267.298, + "args": { + "External id": 228642,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6126, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096316974.851, "dur": 252.064, + "args": { + "External id": 228643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096317260.118, "dur": 2.542, + "args": { + "External id": 228644,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6128, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2070552, "tid": 2070552, + "ts": 5327096317347.687, "dur": 17164.388, + "args": { + "External id": 228645,"Record function id": 0, "Ev Idx": 6129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096317448.993, "dur": 6.419, + "args": { + "External id": 228646,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096317458.861, "dur": 1.044, + "args": { + "External id": 228647,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096317461.546, "dur": 2.196, + "args": { + "External id": 228648,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096317465.473, "dur": 0.754, + "args": { + "External id": 228649,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096317467.722, "dur": 0.945, + "args": { + "External id": 228650,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096317469.759, "dur": 0.816, + "args": { + "External id": 228651,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096317479.448, "dur": 0.796, + "args": { + "External id": 228652,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096317481.740, "dur": 1.635, + "args": { + "External id": 228653,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096317484.810, "dur": 0.819, + "args": { + "External id": 228654,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096317486.984, "dur": 0.562, + "args": { + "External id": 228655,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096317509.334, "dur": 16957.559, + "args": { + "External id": 228656,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096317524.479, "dur": 16934.435, + "args": { + "External id": 228657,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096317542.096, "dur": 13.650, + "args": { + "External id": 228658,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096317559.425, "dur": 16865.181, + "args": { + "External id": 228659,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096317561.842, "dur": 16862.060, + "args": { + "External id": 228660,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096317567.614, "dur": 5.332, + "args": { + "External id": 228661,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096317574.543, "dur": 16846.263, + "args": { + "External id": 228662,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096334696.760, "dur": 39.248, + "args": { + "External id": 228663,"Sequence number": 959135, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6147 + } + }, + { + "ph": "s", "id": 41, "pid": 2070552, "tid": 2070552, "ts": 5327096334696.760, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096334721.365, "dur": 9.968, + "args": { + "External id": 228664,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096334725.383, "dur": 5.546, + "args": { + "External id": 228665,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096334803.349, "dur": 83.957, + "args": { + "External id": 228666,"Record function id": 0, "Ev Idx": 6150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096334888.742, "dur": 1068.809, + "args": { + "External id": 228667,"Record function id": 0, "Ev Idx": 6151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096334931.776, "dur": 1012.147, + "args": { + "External id": 228668,"Sequence number": 959136, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6152 + } + }, + { + "ph": "s", "id": 40, "pid": 2070552, "tid": 2070552, "ts": 5327096334931.776, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096335017.163, "dur": 45.393, + "args": { + "External id": 228669,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096335077.226, "dur": 105.855, + "args": { + "External id": 228670,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096335191.850, "dur": 38.627, + "args": { + "External id": 228671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096335238.362, "dur": 30.824, + "args": { + "External id": 228672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096335293.808, "dur": 24.516, + "args": { + "External id": 228673,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096335335.569, "dur": 13.905, + "args": { + "External id": 228674,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096335368.682, "dur": 126.295, + "args": { + "External id": 228675,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096335417.535, "dur": 11.472, + "args": { + "External id": 228676,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096335423.002, "dur": 5.335, + "args": { + "External id": 228677,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096335431.952, "dur": 5.582, + "args": { + "External id": 228678,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096335439.014, "dur": 0.995, + "args": { + "External id": 228679,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096335442.609, "dur": 3.239, + "args": { + "External id": 228680,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096335504.967, "dur": 42.968, + "args": { + "External id": 228681,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096335576.051, "dur": 26.474, + "args": { + "External id": 228682,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096335610.969, "dur": 82.935, + "args": { + "External id": 228683,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096335707.306, "dur": 36.558, + "args": { + "External id": 228684,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096335768.235, "dur": 27.754, + "args": { + "External id": 228685,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096335801.310, "dur": 33.447, + "args": { + "External id": 228686,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096335854.821, "dur": 16.772, + "args": { + "External id": 228687,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6171 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 2070552, "tid": 2070552, + "ts": 5327096336040.827, "dur": 77.532, + "args": { + "External id": 228688,"Record function id": 0, "Ev Idx": 6172 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096336191.415, "dur": 43.950, + "args": { + "External id": 228689,"Record function id": 0, "Ev Idx": 6173 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 2070552, "tid": 2070552, + "ts": 5327096336244.860, "dur": 18258.315, + "args": { + "External id": 228690,"Record function id": 0, "Ev Idx": 6174 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2070552, "tid": 2070552, + "ts": 5327096336253.554, "dur": 845.051, + "args": { + "External id": 228691,"Record function id": 0, "Ev Idx": 6175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096336332.568, "dur": 7.917, + "args": { + "External id": 228692,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096336354.870, "dur": 35.669, + "args": { + "External id": 228693,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336360.221, "dur": 2.301, + "args": { + "External id": 228694,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336366.887, "dur": 0.254, + "args": { + "External id": 228695,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336369.021, "dur": 0.287, + "args": { + "External id": 228696,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336370.675, "dur": 0.370, + "args": { + "External id": 228697,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336373.935, "dur": 0.324, + "args": { + "External id": 228698,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336375.487, "dur": 0.360, + "args": { + "External id": 228699,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336377.496, "dur": 3.326, + "args": { + "External id": 228700,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336382.191, "dur": 0.353, + "args": { + "External id": 228701,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336384.195, "dur": 0.514, + "args": { + "External id": 228702,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096336401.974, "dur": 42.893, + "args": { + "External id": 228703,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096336478.097, "dur": 105.705, + "args": { + "External id": 228704,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096336488.866, "dur": 4.137, + "args": { + "External id": 228705,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096336497.970, "dur": 10.122, + "args": { + "External id": 228706,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096336502.193, "dur": 5.473, + "args": { + "External id": 228707,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336505.918, "dur": 0.434, + "args": { + "External id": 228708,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096336514.899, "dur": 28.223, + "args": { + "External id": 228709,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336517.007, "dur": 2.558, + "args": { + "External id": 228710,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336520.925, "dur": 0.224, + "args": { + "External id": 228711,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336522.642, "dur": 0.326, + "args": { + "External id": 228712,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336526.235, "dur": 1.166, + "args": { + "External id": 228713,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336528.851, "dur": 0.215, + "args": { + "External id": 228714,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336530.718, "dur": 0.205, + "args": { + "External id": 228715,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336533.949, "dur": 0.419, + "args": { + "External id": 228716,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336535.539, "dur": 0.442, + "args": { + "External id": 228717,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096336537.284, "dur": 2.059, + "args": { + "External id": 228718,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096336553.514, "dur": 22.904, + "args": { + "External id": 228719,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096336680.363, "dur": 321.600, + "args": { + "External id": 228720,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096336712.811, "dur": 283.985, + "args": { + "External id": 228721,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6205, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096336723.931, "dur": 249.533, + "args": { + "External id": 228722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096337026.216, "dur": 2.721, + "args": { + "External id": 228723,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6207, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2070552, "tid": 2070552, + "ts": 5327096337120.153, "dur": 17175.610, + "args": { + "External id": 228724,"Record function id": 0, "Ev Idx": 6208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096337223.173, "dur": 6.711, + "args": { + "External id": 228725,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096337233.259, "dur": 1.076, + "args": { + "External id": 228726,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096337235.819, "dur": 2.168, + "args": { + "External id": 228727,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096337239.478, "dur": 0.646, + "args": { + "External id": 228728,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096337241.445, "dur": 0.814, + "args": { + "External id": 228729,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096337243.465, "dur": 0.751, + "args": { + "External id": 228730,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096337247.860, "dur": 0.680, + "args": { + "External id": 228731,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096337250.105, "dur": 1.904, + "args": { + "External id": 228732,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096337253.484, "dur": 0.673, + "args": { + "External id": 228733,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096337255.785, "dur": 0.702, + "args": { + "External id": 228734,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096337288.024, "dur": 16964.885, + "args": { + "External id": 228735,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096337304.078, "dur": 16941.254, + "args": { + "External id": 228736,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096337321.466, "dur": 14.439, + "args": { + "External id": 228737,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096337339.398, "dur": 16871.767, + "args": { + "External id": 228738,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096337341.988, "dur": 16868.396, + "args": { + "External id": 228739,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096337347.942, "dur": 5.125, + "args": { + "External id": 228740,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096337354.705, "dur": 16852.738, + "args": { + "External id": 228741,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096354441.130, "dur": 37.043, + "args": { + "External id": 228742,"Sequence number": 959137, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6226 + } + }, + { + "ph": "s", "id": 39, "pid": 2070552, "tid": 2070552, "ts": 5327096354441.130, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096354464.046, "dur": 8.975, + "args": { + "External id": 228743,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096354467.822, "dur": 4.978, + "args": { + "External id": 228744,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096354542.037, "dur": 114.002, + "args": { + "External id": 228745,"Record function id": 0, "Ev Idx": 6229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096354659.504, "dur": 1083.449, + "args": { + "External id": 228746,"Record function id": 0, "Ev Idx": 6230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096354703.694, "dur": 1024.412, + "args": { + "External id": 228747,"Sequence number": 959138, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6231 + } + }, + { + "ph": "s", "id": 38, "pid": 2070552, "tid": 2070552, "ts": 5327096354703.694, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096354774.476, "dur": 45.478, + "args": { + "External id": 228748,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096354832.847, "dur": 106.978, + "args": { + "External id": 228749,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096354948.885, "dur": 55.384, + "args": { + "External id": 228750,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096355015.674, "dur": 33.758, + "args": { + "External id": 228751,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096355075.277, "dur": 25.393, + "args": { + "External id": 228752,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096355117.237, "dur": 13.772, + "args": { + "External id": 228753,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096355150.999, "dur": 127.139, + "args": { + "External id": 228754,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096355201.829, "dur": 11.451, + "args": { + "External id": 228755,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096355207.457, "dur": 5.107, + "args": { + "External id": 228756,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096355215.897, "dur": 5.337, + "args": { + "External id": 228757,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096355222.472, "dur": 1.090, + "args": { + "External id": 228758,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096355225.932, "dur": 3.637, + "args": { + "External id": 228759,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096355288.014, "dur": 45.186, + "args": { + "External id": 228760,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096355363.047, "dur": 26.068, + "args": { + "External id": 228761,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096355397.440, "dur": 39.878, + "args": { + "External id": 228762,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096355445.146, "dur": 35.312, + "args": { + "External id": 228763,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096355502.882, "dur": 24.715, + "args": { + "External id": 228764,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096355533.477, "dur": 33.735, + "args": { + "External id": 228765,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096355586.809, "dur": 17.650, + "args": { + "External id": 228766,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6250 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 2070552, "tid": 2070552, + "ts": 5327096355810.573, "dur": 77.347, + "args": { + "External id": 228767,"Record function id": 0, "Ev Idx": 6251 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096355959.060, "dur": 61.896, + "args": { + "External id": 228768,"Record function id": 0, "Ev Idx": 6252 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 2070552, "tid": 2070552, + "ts": 5327096356031.942, "dur": 18240.865, + "args": { + "External id": 228769,"Record function id": 0, "Ev Idx": 6253 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2070552, "tid": 2070552, + "ts": 5327096356041.345, "dur": 859.650, + "args": { + "External id": 228770,"Record function id": 0, "Ev Idx": 6254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096356122.840, "dur": 9.806, + "args": { + "External id": 228771,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096356147.934, "dur": 35.534, + "args": { + "External id": 228772,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356153.465, "dur": 2.221, + "args": { + "External id": 228773,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356160.388, "dur": 0.304, + "args": { + "External id": 228774,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356162.051, "dur": 0.338, + "args": { + "External id": 228775,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356163.847, "dur": 0.350, + "args": { + "External id": 228776,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356167.212, "dur": 0.494, + "args": { + "External id": 228777,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356168.854, "dur": 0.252, + "args": { + "External id": 228778,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356170.501, "dur": 3.071, + "args": { + "External id": 228779,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356175.098, "dur": 0.322, + "args": { + "External id": 228780,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356177.371, "dur": 0.297, + "args": { + "External id": 228781,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096356194.535, "dur": 44.049, + "args": { + "External id": 228782,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096356270.725, "dur": 109.535, + "args": { + "External id": 228783,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096356281.526, "dur": 4.070, + "args": { + "External id": 228784,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096356290.530, "dur": 10.048, + "args": { + "External id": 228785,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096356294.979, "dur": 5.205, + "args": { + "External id": 228786,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356298.440, "dur": 0.510, + "args": { + "External id": 228787,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096356307.700, "dur": 27.696, + "args": { + "External id": 228788,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356309.813, "dur": 2.317, + "args": { + "External id": 228789,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356313.587, "dur": 0.362, + "args": { + "External id": 228790,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356315.509, "dur": 0.193, + "args": { + "External id": 228791,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356319.192, "dur": 1.374, + "args": { + "External id": 228792,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356321.886, "dur": 0.160, + "args": { + "External id": 228793,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356323.441, "dur": 0.300, + "args": { + "External id": 228794,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356326.744, "dur": 0.173, + "args": { + "External id": 228795,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356328.342, "dur": 0.262, + "args": { + "External id": 228796,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096356329.844, "dur": 1.777, + "args": { + "External id": 228797,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096356349.055, "dur": 23.306, + "args": { + "External id": 228798,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096356431.925, "dur": 370.606, + "args": { + "External id": 228799,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096356463.155, "dur": 333.545, + "args": { + "External id": 228800,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6284, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096356473.201, "dur": 317.625, + "args": { + "External id": 228801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096356828.439, "dur": 2.207, + "args": { + "External id": 228802,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6286, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2070552, "tid": 2070552, + "ts": 5327096356921.723, "dur": 17151.039, + "args": { + "External id": 228803,"Record function id": 0, "Ev Idx": 6287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096357040.976, "dur": 7.009, + "args": { + "External id": 228804,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096357051.946, "dur": 0.844, + "args": { + "External id": 228805,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096357068.634, "dur": 1.954, + "args": { + "External id": 228806,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096357074.285, "dur": 0.713, + "args": { + "External id": 228807,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096357076.281, "dur": 0.740, + "args": { + "External id": 228808,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096357078.205, "dur": 0.502, + "args": { + "External id": 228809,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096357081.174, "dur": 0.605, + "args": { + "External id": 228810,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096357085.116, "dur": 1.815, + "args": { + "External id": 228811,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096357088.547, "dur": 0.584, + "args": { + "External id": 228812,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096357090.517, "dur": 0.700, + "args": { + "External id": 228813,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096357112.166, "dur": 16916.841, + "args": { + "External id": 228814,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096357129.478, "dur": 16891.486, + "args": { + "External id": 228815,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096357146.812, "dur": 15.808, + "args": { + "External id": 228816,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096357168.087, "dur": 16805.233, + "args": { + "External id": 228817,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096357170.576, "dur": 16802.086, + "args": { + "External id": 228818,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096357177.084, "dur": 5.218, + "args": { + "External id": 228819,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096357183.818, "dur": 16785.914, + "args": { + "External id": 228820,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096374213.140, "dur": 35.193, + "args": { + "External id": 228821,"Sequence number": 959139, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6305 + } + }, + { + "ph": "s", "id": 37, "pid": 2070552, "tid": 2070552, "ts": 5327096374213.140, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096374234.661, "dur": 9.034, + "args": { + "External id": 228822,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096374238.654, "dur": 4.813, + "args": { + "External id": 228823,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096374311.818, "dur": 84.813, + "args": { + "External id": 228824,"Record function id": 0, "Ev Idx": 6308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096374398.173, "dur": 1058.831, + "args": { + "External id": 228825,"Record function id": 0, "Ev Idx": 6309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096374436.053, "dur": 1007.142, + "args": { + "External id": 228826,"Sequence number": 959140, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6310 + } + }, + { + "ph": "s", "id": 36, "pid": 2070552, "tid": 2070552, "ts": 5327096374436.053, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096374505.297, "dur": 41.614, + "args": { + "External id": 228827,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096374559.404, "dur": 134.173, + "args": { + "External id": 228828,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096374707.169, "dur": 42.337, + "args": { + "External id": 228829,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096374757.987, "dur": 30.606, + "args": { + "External id": 228830,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096374814.694, "dur": 26.353, + "args": { + "External id": 228831,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096374859.880, "dur": 14.956, + "args": { + "External id": 228832,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096374894.476, "dur": 141.020, + "args": { + "External id": 228833,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096374941.848, "dur": 11.016, + "args": { + "External id": 228834,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096374946.941, "dur": 5.211, + "args": { + "External id": 228835,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096374955.509, "dur": 5.279, + "args": { + "External id": 228836,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096374962.081, "dur": 1.057, + "args": { + "External id": 228837,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096374965.498, "dur": 3.428, + "args": { + "External id": 228838,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096375047.548, "dur": 48.627, + "args": { + "External id": 228839,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096375124.864, "dur": 28.088, + "args": { + "External id": 228840,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096375161.755, "dur": 40.822, + "args": { + "External id": 228841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096375210.462, "dur": 34.355, + "args": { + "External id": 228842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096375268.502, "dur": 25.010, + "args": { + "External id": 228843,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096375298.743, "dur": 33.305, + "args": { + "External id": 228844,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096375353.418, "dur": 17.514, + "args": { + "External id": 228845,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6329 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 2070552, "tid": 2070552, + "ts": 5327096375522.427, "dur": 77.162, + "args": { + "External id": 228846,"Record function id": 0, "Ev Idx": 6330 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096375719.175, "dur": 48.011, + "args": { + "External id": 228847,"Record function id": 0, "Ev Idx": 6331 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 2070552, "tid": 2070552, + "ts": 5327096375777.171, "dur": 18196.721, + "args": { + "External id": 228848,"Record function id": 0, "Ev Idx": 6332 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2070552, "tid": 2070552, + "ts": 5327096375785.412, "dur": 816.344, + "args": { + "External id": 228849,"Record function id": 0, "Ev Idx": 6333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096375868.949, "dur": 9.290, + "args": { + "External id": 228850,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096375892.618, "dur": 34.706, + "args": { + "External id": 228851,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096375898.432, "dur": 2.182, + "args": { + "External id": 228852,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096375904.711, "dur": 0.342, + "args": { + "External id": 228853,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096375906.341, "dur": 0.327, + "args": { + "External id": 228854,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096375908.421, "dur": 0.445, + "args": { + "External id": 228855,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096375912.216, "dur": 0.222, + "args": { + "External id": 228856,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096375913.717, "dur": 0.372, + "args": { + "External id": 228857,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096375915.837, "dur": 2.373, + "args": { + "External id": 228858,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096375919.674, "dur": 0.355, + "args": { + "External id": 228859,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096375921.543, "dur": 0.307, + "args": { + "External id": 228860,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096375938.857, "dur": 60.124, + "args": { + "External id": 228861,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096376035.329, "dur": 141.868, + "args": { + "External id": 228862,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096376047.342, "dur": 4.916, + "args": { + "External id": 228863,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096376057.880, "dur": 37.018, + "args": { + "External id": 228864,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096376087.447, "dur": 6.933, + "args": { + "External id": 228865,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096376091.743, "dur": 0.755, + "args": { + "External id": 228866,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096376102.629, "dur": 28.762, + "args": { + "External id": 228867,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096376105.049, "dur": 2.695, + "args": { + "External id": 228868,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096376109.292, "dur": 0.200, + "args": { + "External id": 228869,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096376110.946, "dur": 0.199, + "args": { + "External id": 228870,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096376114.498, "dur": 1.272, + "args": { + "External id": 228871,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096376117.599, "dur": 0.147, + "args": { + "External id": 228872,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096376119.375, "dur": 0.156, + "args": { + "External id": 228873,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096376122.884, "dur": 0.158, + "args": { + "External id": 228874,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096376124.446, "dur": 0.153, + "args": { + "External id": 228875,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096376125.972, "dur": 1.900, + "args": { + "External id": 228876,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096376142.987, "dur": 26.103, + "args": { + "External id": 228877,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096376230.538, "dur": 288.263, + "args": { + "External id": 228878,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096376262.862, "dur": 251.581, + "args": { + "External id": 228879,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6363, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096376276.212, "dur": 233.224, + "args": { + "External id": 228880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096376539.396, "dur": 2.371, + "args": { + "External id": 228881,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6365, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2070552, "tid": 2070552, + "ts": 5327096376663.371, "dur": 17110.521, + "args": { + "External id": 228882,"Record function id": 0, "Ev Idx": 6366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096376766.280, "dur": 6.388, + "args": { + "External id": 228883,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096376776.214, "dur": 0.901, + "args": { + "External id": 228884,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096376778.870, "dur": 1.741, + "args": { + "External id": 228885,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096376782.559, "dur": 0.667, + "args": { + "External id": 228886,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096376784.532, "dur": 0.711, + "args": { + "External id": 228887,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096376786.362, "dur": 0.729, + "args": { + "External id": 228888,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096376790.358, "dur": 0.825, + "args": { + "External id": 228889,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096376793.003, "dur": 1.587, + "args": { + "External id": 228890,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096376796.161, "dur": 0.602, + "args": { + "External id": 228891,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096376798.553, "dur": 0.440, + "args": { + "External id": 228892,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096376820.375, "dur": 16909.642, + "args": { + "External id": 228893,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096376835.996, "dur": 16885.841, + "args": { + "External id": 228894,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096376855.374, "dur": 14.514, + "args": { + "External id": 228895,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096376873.812, "dur": 16813.794, + "args": { + "External id": 228896,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096376876.444, "dur": 16810.309, + "args": { + "External id": 228897,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096376881.872, "dur": 5.330, + "args": { + "External id": 228898,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096376888.778, "dur": 16794.868, + "args": { + "External id": 228899,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096393912.612, "dur": 36.332, + "args": { + "External id": 228900,"Sequence number": 959141, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6384 + } + }, + { + "ph": "s", "id": 35, "pid": 2070552, "tid": 2070552, "ts": 5327096393912.612, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096393934.689, "dur": 9.332, + "args": { + "External id": 228901,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096393938.744, "dur": 4.999, + "args": { + "External id": 228902,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096394025.632, "dur": 85.840, + "args": { + "External id": 228903,"Record function id": 0, "Ev Idx": 6387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096394113.467, "dur": 1084.365, + "args": { + "External id": 228904,"Record function id": 0, "Ev Idx": 6388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096394155.315, "dur": 1029.136, + "args": { + "External id": 228905,"Sequence number": 959142, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6389 + } + }, + { + "ph": "s", "id": 34, "pid": 2070552, "tid": 2070552, "ts": 5327096394155.315, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096394223.852, "dur": 43.821, + "args": { + "External id": 228906,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096394279.983, "dur": 106.946, + "args": { + "External id": 228907,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096394395.949, "dur": 37.159, + "args": { + "External id": 228908,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096394441.581, "dur": 30.447, + "args": { + "External id": 228909,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096394496.893, "dur": 23.771, + "args": { + "External id": 228910,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096394537.445, "dur": 13.134, + "args": { + "External id": 228911,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096394569.895, "dur": 175.071, + "args": { + "External id": 228912,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096394615.760, "dur": 58.274, + "args": { + "External id": 228913,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096394666.582, "dur": 6.489, + "args": { + "External id": 228914,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096394677.118, "dur": 5.008, + "args": { + "External id": 228915,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096394683.780, "dur": 0.998, + "args": { + "External id": 228916,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096394687.247, "dur": 3.549, + "args": { + "External id": 228917,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096394756.573, "dur": 52.004, + "args": { + "External id": 228918,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096394838.820, "dur": 28.289, + "args": { + "External id": 228919,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096394875.189, "dur": 40.528, + "args": { + "External id": 228920,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096394923.685, "dur": 34.349, + "args": { + "External id": 228921,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096394998.177, "dur": 28.525, + "args": { + "External id": 228922,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096395034.399, "dur": 37.886, + "args": { + "External id": 228923,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096395093.182, "dur": 17.852, + "args": { + "External id": 228924,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6408 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 2070552, "tid": 2070552, + "ts": 5327096395260.597, "dur": 75.263, + "args": { + "External id": 228925,"Record function id": 0, "Ev Idx": 6409 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096395408.424, "dur": 47.867, + "args": { + "External id": 228926,"Record function id": 0, "Ev Idx": 6410 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 2070552, "tid": 2070552, + "ts": 5327096395465.421, "dur": 18364.690, + "args": { + "External id": 228927,"Record function id": 0, "Ev Idx": 6411 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2070552, "tid": 2070552, + "ts": 5327096395472.954, "dur": 840.228, + "args": { + "External id": 228928,"Record function id": 0, "Ev Idx": 6412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096395553.768, "dur": 7.993, + "args": { + "External id": 228929,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096395576.267, "dur": 36.197, + "args": { + "External id": 228930,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395581.843, "dur": 2.197, + "args": { + "External id": 228931,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395588.746, "dur": 0.247, + "args": { + "External id": 228932,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395590.859, "dur": 0.390, + "args": { + "External id": 228933,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395593.245, "dur": 0.371, + "args": { + "External id": 228934,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395596.675, "dur": 0.486, + "args": { + "External id": 228935,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395598.567, "dur": 0.331, + "args": { + "External id": 228936,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395600.230, "dur": 3.248, + "args": { + "External id": 228937,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395605.170, "dur": 0.522, + "args": { + "External id": 228938,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395607.028, "dur": 0.389, + "args": { + "External id": 228939,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096395664.008, "dur": 42.026, + "args": { + "External id": 228940,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096395742.990, "dur": 117.848, + "args": { + "External id": 228941,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096395754.476, "dur": 5.157, + "args": { + "External id": 228942,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096395764.756, "dur": 11.225, + "args": { + "External id": 228943,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096395769.467, "dur": 6.083, + "args": { + "External id": 228944,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395772.893, "dur": 0.854, + "args": { + "External id": 228945,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096395783.708, "dur": 30.096, + "args": { + "External id": 228946,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395786.627, "dur": 1.687, + "args": { + "External id": 228947,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395789.962, "dur": 0.417, + "args": { + "External id": 228948,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395791.695, "dur": 0.185, + "args": { + "External id": 228949,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395795.349, "dur": 1.345, + "args": { + "External id": 228950,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395798.073, "dur": 0.179, + "args": { + "External id": 228951,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395800.142, "dur": 0.370, + "args": { + "External id": 228952,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395803.531, "dur": 0.503, + "args": { + "External id": 228953,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395805.586, "dur": 0.569, + "args": { + "External id": 228954,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096395807.688, "dur": 2.000, + "args": { + "External id": 228955,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096395829.429, "dur": 23.225, + "args": { + "External id": 228956,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096395914.853, "dur": 308.038, + "args": { + "External id": 228957,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096395945.646, "dur": 272.240, + "args": { + "External id": 228958,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6442, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096395955.634, "dur": 254.772, + "args": { + "External id": 228959,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096396246.488, "dur": 2.278, + "args": { + "External id": 228960,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6444, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2070552, "tid": 2070552, + "ts": 5327096396333.748, "dur": 17238.817, + "args": { + "External id": 228961,"Record function id": 0, "Ev Idx": 6445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096396430.022, "dur": 6.307, + "args": { + "External id": 228962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096396439.535, "dur": 0.937, + "args": { + "External id": 228963,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096396442.363, "dur": 1.852, + "args": { + "External id": 228964,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096396446.054, "dur": 0.773, + "args": { + "External id": 228965,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096396448.344, "dur": 0.842, + "args": { + "External id": 228966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096396450.440, "dur": 0.914, + "args": { + "External id": 228967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096396454.884, "dur": 0.930, + "args": { + "External id": 228968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096396457.377, "dur": 1.866, + "args": { + "External id": 228969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096396461.091, "dur": 0.776, + "args": { + "External id": 228970,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096396463.541, "dur": 0.664, + "args": { + "External id": 228971,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096396484.641, "dur": 17042.218, + "args": { + "External id": 228972,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096396500.457, "dur": 17018.757, + "args": { + "External id": 228973,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096396524.428, "dur": 15.269, + "args": { + "External id": 228974,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096396543.461, "dur": 16942.732, + "args": { + "External id": 228975,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096396545.792, "dur": 16939.680, + "args": { + "External id": 228976,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096396553.403, "dur": 6.690, + "args": { + "External id": 228977,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096396561.698, "dur": 16920.872, + "args": { + "External id": 228978,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096413762.032, "dur": 40.792, + "args": { + "External id": 228979,"Sequence number": 959143, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6463 + } + }, + { + "ph": "s", "id": 33, "pid": 2070552, "tid": 2070552, "ts": 5327096413762.032, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096413788.208, "dur": 9.651, + "args": { + "External id": 228980,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096413791.984, "dur": 5.481, + "args": { + "External id": 228981,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096413868.551, "dur": 82.386, + "args": { + "External id": 228982,"Record function id": 0, "Ev Idx": 6466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096413952.707, "dur": 1075.460, + "args": { + "External id": 228983,"Record function id": 0, "Ev Idx": 6467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096414007.678, "dur": 1006.670, + "args": { + "External id": 228984,"Sequence number": 959144, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6468 + } + }, + { + "ph": "s", "id": 32, "pid": 2070552, "tid": 2070552, "ts": 5327096414007.678, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096414078.753, "dur": 45.587, + "args": { + "External id": 228985,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096414137.073, "dur": 105.355, + "args": { + "External id": 228986,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096414252.159, "dur": 37.970, + "args": { + "External id": 228987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096414298.519, "dur": 30.264, + "args": { + "External id": 228988,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096414353.606, "dur": 24.314, + "args": { + "External id": 228989,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096414396.669, "dur": 14.099, + "args": { + "External id": 228990,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096414429.277, "dur": 122.884, + "args": { + "External id": 228991,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096414476.524, "dur": 11.506, + "args": { + "External id": 228992,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096414481.898, "dur": 5.411, + "args": { + "External id": 228993,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096414490.593, "dur": 5.528, + "args": { + "External id": 228994,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096414497.634, "dur": 0.926, + "args": { + "External id": 228995,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096414501.100, "dur": 3.460, + "args": { + "External id": 228996,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096414561.857, "dur": 43.335, + "args": { + "External id": 228997,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096414673.870, "dur": 31.468, + "args": { + "External id": 228998,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096414716.046, "dur": 45.298, + "args": { + "External id": 228999,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096414770.418, "dur": 34.907, + "args": { + "External id": 229000,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096414827.722, "dur": 23.024, + "args": { + "External id": 229001,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096414856.339, "dur": 32.996, + "args": { + "External id": 229002,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096414909.684, "dur": 16.899, + "args": { + "External id": 229003,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6487 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 2070552, "tid": 2070552, + "ts": 5327096415093.864, "dur": 76.128, + "args": { + "External id": 229004,"Record function id": 0, "Ev Idx": 6488 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096415242.169, "dur": 45.215, + "args": { + "External id": 229005,"Record function id": 0, "Ev Idx": 6489 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 2070552, "tid": 2070552, + "ts": 5327096415296.688, "dur": 18542.750, + "args": { + "External id": 229006,"Record function id": 0, "Ev Idx": 6490 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2070552, "tid": 2070552, + "ts": 5327096415306.039, "dur": 918.784, + "args": { + "External id": 229007,"Record function id": 0, "Ev Idx": 6491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096415384.746, "dur": 8.991, + "args": { + "External id": 229008,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096415409.833, "dur": 35.585, + "args": { + "External id": 229009,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415415.023, "dur": 2.494, + "args": { + "External id": 229010,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415421.837, "dur": 0.214, + "args": { + "External id": 229011,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415423.873, "dur": 0.252, + "args": { + "External id": 229012,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415425.412, "dur": 0.532, + "args": { + "External id": 229013,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415428.754, "dur": 0.452, + "args": { + "External id": 229014,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415430.738, "dur": 0.417, + "args": { + "External id": 229015,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415432.570, "dur": 3.344, + "args": { + "External id": 229016,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415437.527, "dur": 0.206, + "args": { + "External id": 229017,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415439.274, "dur": 0.386, + "args": { + "External id": 229018,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096415456.206, "dur": 40.034, + "args": { + "External id": 229019,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096415528.484, "dur": 157.226, + "args": { + "External id": 229020,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096415538.865, "dur": 3.748, + "args": { + "External id": 229021,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096415547.767, "dur": 11.104, + "args": { + "External id": 229022,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096415552.256, "dur": 6.184, + "args": { + "External id": 229023,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415556.629, "dur": 0.608, + "args": { + "External id": 229024,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096415566.030, "dur": 32.338, + "args": { + "External id": 229025,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415572.215, "dur": 2.048, + "args": { + "External id": 229026,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415575.848, "dur": 0.209, + "args": { + "External id": 229027,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415577.790, "dur": 0.217, + "args": { + "External id": 229028,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415581.122, "dur": 1.133, + "args": { + "External id": 229029,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415583.744, "dur": 0.158, + "args": { + "External id": 229030,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415585.346, "dur": 0.160, + "args": { + "External id": 229031,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415588.511, "dur": 0.158, + "args": { + "External id": 229032,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415589.832, "dur": 0.445, + "args": { + "External id": 229033,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096415591.549, "dur": 2.619, + "args": { + "External id": 229034,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096415610.484, "dur": 64.940, + "args": { + "External id": 229035,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096415743.960, "dur": 384.042, + "args": { + "External id": 229036,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096415775.402, "dur": 347.211, + "args": { + "External id": 229037,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6521, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096415785.605, "dur": 330.886, + "args": { + "External id": 229038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096416152.164, "dur": 2.591, + "args": { + "External id": 229039,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6523, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2070552, "tid": 2070552, + "ts": 5327096416246.667, "dur": 17356.783, + "args": { + "External id": 229040,"Record function id": 0, "Ev Idx": 6524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096416345.998, "dur": 6.671, + "args": { + "External id": 229041,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096416356.301, "dur": 1.235, + "args": { + "External id": 229042,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096416359.216, "dur": 1.764, + "args": { + "External id": 229043,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096416362.563, "dur": 0.932, + "args": { + "External id": 229044,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096416364.721, "dur": 0.935, + "args": { + "External id": 229045,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096416366.836, "dur": 0.811, + "args": { + "External id": 229046,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096416371.204, "dur": 0.988, + "args": { + "External id": 229047,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096416373.696, "dur": 2.211, + "args": { + "External id": 229048,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096416377.569, "dur": 0.670, + "args": { + "External id": 229049,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096416380.032, "dur": 0.911, + "args": { + "External id": 229050,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096416402.035, "dur": 17156.365, + "args": { + "External id": 229051,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096416417.691, "dur": 17132.942, + "args": { + "External id": 229052,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096416439.185, "dur": 15.487, + "args": { + "External id": 229053,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096416458.120, "dur": 17058.071, + "args": { + "External id": 229054,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096416460.580, "dur": 17054.866, + "args": { + "External id": 229055,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096416466.076, "dur": 5.342, + "args": { + "External id": 229056,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096416472.869, "dur": 17039.283, + "args": { + "External id": 229057,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096433775.349, "dur": 36.442, + "args": { + "External id": 229058,"Sequence number": 959145, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6542 + } + }, + { + "ph": "s", "id": 31, "pid": 2070552, "tid": 2070552, "ts": 5327096433775.349, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096433797.390, "dur": 9.309, + "args": { + "External id": 229059,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096433801.238, "dur": 5.048, + "args": { + "External id": 229060,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096433879.701, "dur": 83.129, + "args": { + "External id": 229061,"Record function id": 0, "Ev Idx": 6545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096433964.507, "dur": 1093.071, + "args": { + "External id": 229062,"Record function id": 0, "Ev Idx": 6546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096434022.907, "dur": 1020.943, + "args": { + "External id": 229063,"Sequence number": 959146, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6547 + } + }, + { + "ph": "s", "id": 30, "pid": 2070552, "tid": 2070552, "ts": 5327096434022.907, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096434094.201, "dur": 44.648, + "args": { + "External id": 229064,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096434152.736, "dur": 107.128, + "args": { + "External id": 229065,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096434269.017, "dur": 37.667, + "args": { + "External id": 229066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096434314.519, "dur": 31.577, + "args": { + "External id": 229067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096434370.804, "dur": 25.098, + "args": { + "External id": 229068,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096434421.137, "dur": 13.587, + "args": { + "External id": 229069,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096434453.695, "dur": 126.086, + "args": { + "External id": 229070,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096434505.563, "dur": 11.687, + "args": { + "External id": 229071,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096434510.971, "dur": 5.583, + "args": { + "External id": 229072,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096434519.993, "dur": 4.046, + "args": { + "External id": 229073,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096434525.118, "dur": 1.159, + "args": { + "External id": 229074,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096434528.518, "dur": 3.497, + "args": { + "External id": 229075,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096434589.565, "dur": 83.977, + "args": { + "External id": 229076,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096434709.157, "dur": 28.316, + "args": { + "External id": 229077,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096434745.988, "dur": 46.135, + "args": { + "External id": 229078,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096434800.608, "dur": 35.038, + "args": { + "External id": 229079,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096434858.602, "dur": 25.157, + "args": { + "External id": 229080,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096434889.299, "dur": 33.628, + "args": { + "External id": 229081,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096434942.153, "dur": 18.043, + "args": { + "External id": 229082,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6566 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 2070552, "tid": 2070552, + "ts": 5327096435122.333, "dur": 77.091, + "args": { + "External id": 229083,"Record function id": 0, "Ev Idx": 6567 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096435271.016, "dur": 45.432, + "args": { + "External id": 229084,"Record function id": 0, "Ev Idx": 6568 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 2070552, "tid": 2070552, + "ts": 5327096435325.962, "dur": 18233.287, + "args": { + "External id": 229085,"Record function id": 0, "Ev Idx": 6569 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2070552, "tid": 2070552, + "ts": 5327096435333.838, "dur": 835.028, + "args": { + "External id": 229086,"Record function id": 0, "Ev Idx": 6570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096435415.142, "dur": 8.853, + "args": { + "External id": 229087,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096435438.221, "dur": 36.825, + "args": { + "External id": 229088,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435443.739, "dur": 2.301, + "args": { + "External id": 229089,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435450.513, "dur": 0.261, + "args": { + "External id": 229090,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435452.383, "dur": 0.210, + "args": { + "External id": 229091,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435454.311, "dur": 0.509, + "args": { + "External id": 229092,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435458.132, "dur": 0.443, + "args": { + "External id": 229093,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435460.120, "dur": 0.338, + "args": { + "External id": 229094,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435461.899, "dur": 3.333, + "args": { + "External id": 229095,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435466.895, "dur": 0.495, + "args": { + "External id": 229096,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435468.756, "dur": 0.292, + "args": { + "External id": 229097,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096435485.543, "dur": 44.160, + "args": { + "External id": 229098,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096435560.625, "dur": 157.048, + "args": { + "External id": 229099,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096435571.247, "dur": 3.483, + "args": { + "External id": 229100,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096435579.683, "dur": 10.708, + "args": { + "External id": 229101,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096435584.543, "dur": 5.426, + "args": { + "External id": 229102,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435588.209, "dur": 0.530, + "args": { + "External id": 229103,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096435597.193, "dur": 67.848, + "args": { + "External id": 229104,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435599.392, "dur": 2.003, + "args": { + "External id": 229105,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435602.866, "dur": 0.506, + "args": { + "External id": 229106,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435604.743, "dur": 0.224, + "args": { + "External id": 229107,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435608.108, "dur": 1.238, + "args": { + "External id": 229108,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435610.631, "dur": 0.546, + "args": { + "External id": 229109,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435612.351, "dur": 0.419, + "args": { + "External id": 229110,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435615.529, "dur": 0.337, + "args": { + "External id": 229111,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435617.181, "dur": 0.302, + "args": { + "External id": 229112,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096435657.521, "dur": 2.991, + "args": { + "External id": 229113,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096435680.911, "dur": 28.606, + "args": { + "External id": 229114,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096435771.980, "dur": 303.639, + "args": { + "External id": 229115,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096435802.092, "dur": 268.631, + "args": { + "External id": 229116,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6600, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096435812.061, "dur": 253.048, + "args": { + "External id": 229117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096436100.317, "dur": 2.320, + "args": { + "External id": 229118,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6602, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2070552, "tid": 2070552, + "ts": 5327096436190.176, "dur": 17172.006, + "args": { + "External id": 229119,"Record function id": 0, "Ev Idx": 6603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096436286.415, "dur": 6.232, + "args": { + "External id": 229120,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096436295.860, "dur": 1.190, + "args": { + "External id": 229121,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096436298.686, "dur": 2.077, + "args": { + "External id": 229122,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096436302.188, "dur": 0.817, + "args": { + "External id": 229123,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096436304.261, "dur": 0.840, + "args": { + "External id": 229124,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096436306.262, "dur": 0.849, + "args": { + "External id": 229125,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096436310.479, "dur": 0.811, + "args": { + "External id": 229126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096436312.809, "dur": 1.973, + "args": { + "External id": 229127,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096436316.321, "dur": 0.647, + "args": { + "External id": 229128,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096436318.482, "dur": 0.867, + "args": { + "External id": 229129,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096436339.974, "dur": 16978.335, + "args": { + "External id": 229130,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096436356.307, "dur": 16953.984, + "args": { + "External id": 229131,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096436380.072, "dur": 15.350, + "args": { + "External id": 229132,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096436398.985, "dur": 16877.572, + "args": { + "External id": 229133,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096436401.391, "dur": 16874.482, + "args": { + "External id": 229134,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096436407.486, "dur": 5.201, + "args": { + "External id": 229135,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096436414.309, "dur": 16858.458, + "args": { + "External id": 229136,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096453499.746, "dur": 35.817, + "args": { + "External id": 229137,"Sequence number": 959147, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6621 + } + }, + { + "ph": "s", "id": 29, "pid": 2070552, "tid": 2070552, "ts": 5327096453499.746, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096453521.815, "dur": 8.989, + "args": { + "External id": 229138,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096453525.851, "dur": 4.740, + "args": { + "External id": 229139,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096453599.699, "dur": 127.049, + "args": { + "External id": 229140,"Record function id": 0, "Ev Idx": 6624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096453730.012, "dur": 1086.159, + "args": { + "External id": 229141,"Record function id": 0, "Ev Idx": 6625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096453777.969, "dur": 1024.788, + "args": { + "External id": 229142,"Sequence number": 959148, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6626 + } + }, + { + "ph": "s", "id": 28, "pid": 2070552, "tid": 2070552, "ts": 5327096453777.969, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096453850.089, "dur": 45.205, + "args": { + "External id": 229143,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096453908.348, "dur": 119.411, + "args": { + "External id": 229144,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096454042.379, "dur": 40.641, + "args": { + "External id": 229145,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096454089.214, "dur": 30.601, + "args": { + "External id": 229146,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096454148.526, "dur": 25.900, + "args": { + "External id": 229147,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096454190.283, "dur": 13.890, + "args": { + "External id": 229148,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096454222.409, "dur": 126.086, + "args": { + "External id": 229149,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096454272.040, "dur": 11.212, + "args": { + "External id": 229150,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096454277.219, "dur": 5.296, + "args": { + "External id": 229151,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096454286.254, "dur": 3.914, + "args": { + "External id": 229152,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096454291.581, "dur": 1.140, + "args": { + "External id": 229153,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096454297.103, "dur": 3.642, + "args": { + "External id": 229154,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096454358.229, "dur": 45.139, + "args": { + "External id": 229155,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096454433.716, "dur": 27.737, + "args": { + "External id": 229156,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096454469.633, "dur": 41.206, + "args": { + "External id": 229157,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096454517.271, "dur": 34.256, + "args": { + "External id": 229158,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096454573.069, "dur": 27.889, + "args": { + "External id": 229159,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096454606.395, "dur": 85.382, + "args": { + "External id": 229160,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096454714.233, "dur": 18.932, + "args": { + "External id": 229161,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6645 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 2070552, "tid": 2070552, + "ts": 5327096454882.877, "dur": 80.167, + "args": { + "External id": 229162,"Record function id": 0, "Ev Idx": 6646 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096455054.437, "dur": 48.982, + "args": { + "External id": 229163,"Record function id": 0, "Ev Idx": 6647 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 2070552, "tid": 2070552, + "ts": 5327096455113.506, "dur": 18363.372, + "args": { + "External id": 229164,"Record function id": 0, "Ev Idx": 6648 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2070552, "tid": 2070552, + "ts": 5327096455121.576, "dur": 815.120, + "args": { + "External id": 229165,"Record function id": 0, "Ev Idx": 6649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096455203.955, "dur": 9.833, + "args": { + "External id": 229166,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096455227.657, "dur": 36.659, + "args": { + "External id": 229167,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455233.144, "dur": 2.378, + "args": { + "External id": 229168,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455239.988, "dur": 0.258, + "args": { + "External id": 229169,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455241.923, "dur": 0.504, + "args": { + "External id": 229170,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455244.428, "dur": 0.155, + "args": { + "External id": 229171,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455247.032, "dur": 0.502, + "args": { + "External id": 229172,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455249.148, "dur": 0.376, + "args": { + "External id": 229173,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455251.161, "dur": 2.222, + "args": { + "External id": 229174,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455254.772, "dur": 1.732, + "args": { + "External id": 229175,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455258.039, "dur": 0.506, + "args": { + "External id": 229176,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096455279.091, "dur": 41.046, + "args": { + "External id": 229177,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096455352.609, "dur": 111.082, + "args": { + "External id": 229178,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096455363.645, "dur": 5.435, + "args": { + "External id": 229179,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096455374.275, "dur": 11.153, + "args": { + "External id": 229180,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096455378.855, "dur": 6.079, + "args": { + "External id": 229181,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455383.061, "dur": 0.533, + "args": { + "External id": 229182,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096455392.675, "dur": 27.293, + "args": { + "External id": 229183,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455394.833, "dur": 0.571, + "args": { + "External id": 229184,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455397.102, "dur": 2.110, + "args": { + "External id": 229185,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455400.916, "dur": 0.424, + "args": { + "External id": 229186,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455402.666, "dur": 0.312, + "args": { + "External id": 229187,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455406.239, "dur": 1.804, + "args": { + "External id": 229188,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455409.379, "dur": 0.452, + "args": { + "External id": 229189,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455410.952, "dur": 0.375, + "args": { + "External id": 229190,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455414.118, "dur": 0.560, + "args": { + "External id": 229191,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096455416.184, "dur": 0.315, + "args": { + "External id": 229192,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096455432.556, "dur": 23.204, + "args": { + "External id": 229193,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096455516.089, "dur": 329.264, + "args": { + "External id": 229194,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096455546.489, "dur": 294.041, + "args": { + "External id": 229195,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6679, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096455556.483, "dur": 277.857, + "args": { + "External id": 229196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096455868.665, "dur": 2.360, + "args": { + "External id": 229197,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6681, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2070552, "tid": 2070552, + "ts": 5327096455957.663, "dur": 17316.164, + "args": { + "External id": 229198,"Record function id": 0, "Ev Idx": 6682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096456076.727, "dur": 6.676, + "args": { + "External id": 229199,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096456086.978, "dur": 1.099, + "args": { + "External id": 229200,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096456089.774, "dur": 1.012, + "args": { + "External id": 229201,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096456092.603, "dur": 2.475, + "args": { + "External id": 229202,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096456096.518, "dur": 0.684, + "args": { + "External id": 229203,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096456098.380, "dur": 0.965, + "args": { + "External id": 229204,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096456103.177, "dur": 0.867, + "args": { + "External id": 229205,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096456105.523, "dur": 1.851, + "args": { + "External id": 229206,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096456108.746, "dur": 0.590, + "args": { + "External id": 229207,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096456110.845, "dur": 0.651, + "args": { + "External id": 229208,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096456131.863, "dur": 17097.695, + "args": { + "External id": 229209,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096456147.530, "dur": 17073.509, + "args": { + "External id": 229210,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096456169.803, "dur": 13.977, + "args": { + "External id": 229211,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096456187.319, "dur": 16999.757, + "args": { + "External id": 229212,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096456189.894, "dur": 16996.371, + "args": { + "External id": 229213,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096456195.892, "dur": 6.466, + "args": { + "External id": 229214,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096456203.995, "dur": 16979.177, + "args": { + "External id": 229215,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096473416.242, "dur": 36.129, + "args": { + "External id": 229216,"Sequence number": 959149, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6700 + } + }, + { + "ph": "s", "id": 27, "pid": 2070552, "tid": 2070552, "ts": 5327096473416.242, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096473438.455, "dur": 9.036, + "args": { + "External id": 229217,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096473442.276, "dur": 4.954, + "args": { + "External id": 229218,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096473514.815, "dur": 78.287, + "args": { + "External id": 229219,"Record function id": 0, "Ev Idx": 6703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096473594.723, "dur": 1132.670, + "args": { + "External id": 229220,"Record function id": 0, "Ev Idx": 6704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096473687.172, "dur": 1025.541, + "args": { + "External id": 229221,"Sequence number": 959150, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6705 + } + }, + { + "ph": "s", "id": 26, "pid": 2070552, "tid": 2070552, "ts": 5327096473687.172, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096473757.353, "dur": 45.304, + "args": { + "External id": 229222,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096473816.277, "dur": 108.817, + "args": { + "External id": 229223,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096473934.929, "dur": 54.773, + "args": { + "External id": 229224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096474001.706, "dur": 34.051, + "args": { + "External id": 229225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096474061.954, "dur": 28.412, + "args": { + "External id": 229226,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096474107.941, "dur": 13.671, + "args": { + "External id": 229227,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096474140.544, "dur": 121.728, + "args": { + "External id": 229228,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096474188.941, "dur": 11.381, + "args": { + "External id": 229229,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096474194.468, "dur": 5.063, + "args": { + "External id": 229230,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096474203.021, "dur": 4.052, + "args": { + "External id": 229231,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096474208.453, "dur": 1.158, + "args": { + "External id": 229232,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096474211.846, "dur": 2.670, + "args": { + "External id": 229233,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096474272.651, "dur": 45.541, + "args": { + "External id": 229234,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096474347.982, "dur": 27.902, + "args": { + "External id": 229235,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096474382.369, "dur": 40.427, + "args": { + "External id": 229236,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096474431.040, "dur": 34.861, + "args": { + "External id": 229237,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096474487.840, "dur": 26.678, + "args": { + "External id": 229238,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096474520.191, "dur": 33.371, + "args": { + "External id": 229239,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096474572.545, "dur": 19.687, + "args": { + "External id": 229240,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6724 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 2070552, "tid": 2070552, + "ts": 5327096474793.341, "dur": 76.520, + "args": { + "External id": 229241,"Record function id": 0, "Ev Idx": 6725 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096474940.636, "dur": 61.926, + "args": { + "External id": 229242,"Record function id": 0, "Ev Idx": 6726 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 2070552, "tid": 2070552, + "ts": 5327096475013.531, "dur": 18325.578, + "args": { + "External id": 229243,"Record function id": 0, "Ev Idx": 6727 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2070552, "tid": 2070552, + "ts": 5327096475023.207, "dur": 862.057, + "args": { + "External id": 229244,"Record function id": 0, "Ev Idx": 6728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096475104.172, "dur": 9.629, + "args": { + "External id": 229245,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096475128.133, "dur": 36.429, + "args": { + "External id": 229246,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475133.838, "dur": 2.329, + "args": { + "External id": 229247,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475140.785, "dur": 0.516, + "args": { + "External id": 229248,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475142.942, "dur": 0.435, + "args": { + "External id": 229249,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475144.873, "dur": 0.244, + "args": { + "External id": 229250,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475148.152, "dur": 0.312, + "args": { + "External id": 229251,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475150.001, "dur": 0.421, + "args": { + "External id": 229252,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475151.912, "dur": 3.211, + "args": { + "External id": 229253,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475156.683, "dur": 0.287, + "args": { + "External id": 229254,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475158.337, "dur": 0.353, + "args": { + "External id": 229255,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096475175.829, "dur": 42.677, + "args": { + "External id": 229256,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096475251.059, "dur": 112.441, + "args": { + "External id": 229257,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096475261.625, "dur": 4.220, + "args": { + "External id": 229258,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096475270.895, "dur": 10.397, + "args": { + "External id": 229259,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096475275.341, "dur": 5.550, + "args": { + "External id": 229260,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475279.154, "dur": 0.471, + "args": { + "External id": 229261,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096475288.496, "dur": 29.133, + "args": { + "External id": 229262,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475290.785, "dur": 1.901, + "args": { + "External id": 229263,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475294.387, "dur": 0.472, + "args": { + "External id": 229264,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475296.308, "dur": 0.293, + "args": { + "External id": 229265,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475300.162, "dur": 1.644, + "args": { + "External id": 229266,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475303.286, "dur": 0.166, + "args": { + "External id": 229267,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475305.034, "dur": 0.288, + "args": { + "External id": 229268,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475308.287, "dur": 0.153, + "args": { + "External id": 229269,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475309.865, "dur": 0.528, + "args": { + "External id": 229270,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096475311.983, "dur": 1.839, + "args": { + "External id": 229271,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096475332.358, "dur": 23.458, + "args": { + "External id": 229272,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096475416.357, "dur": 368.937, + "args": { + "External id": 229273,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096475447.310, "dur": 332.263, + "args": { + "External id": 229274,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6758, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096475457.557, "dur": 315.976, + "args": { + "External id": 229275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096475811.761, "dur": 2.213, + "args": { + "External id": 229276,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6760, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2070552, "tid": 2070552, + "ts": 5327096475907.862, "dur": 17224.408, + "args": { + "External id": 229277,"Record function id": 0, "Ev Idx": 6761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096476029.099, "dur": 6.833, + "args": { + "External id": 229278,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096476039.919, "dur": 1.132, + "args": { + "External id": 229279,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096476042.781, "dur": 2.676, + "args": { + "External id": 229280,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096476047.190, "dur": 0.841, + "args": { + "External id": 229281,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096476049.490, "dur": 0.936, + "args": { + "External id": 229282,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096476051.724, "dur": 0.616, + "args": { + "External id": 229283,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096476069.625, "dur": 0.761, + "args": { + "External id": 229284,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096476072.931, "dur": 1.661, + "args": { + "External id": 229285,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096476076.216, "dur": 0.646, + "args": { + "External id": 229286,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096476078.195, "dur": 0.548, + "args": { + "External id": 229287,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096476102.345, "dur": 16985.123, + "args": { + "External id": 229288,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096476118.552, "dur": 16960.639, + "args": { + "External id": 229289,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096476136.743, "dur": 14.346, + "args": { + "External id": 229290,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096476154.596, "dur": 16888.669, + "args": { + "External id": 229291,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096476157.109, "dur": 16885.517, + "args": { + "External id": 229292,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096476162.692, "dur": 5.914, + "args": { + "External id": 229293,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096476170.292, "dur": 16869.327, + "args": { + "External id": 229294,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096493275.622, "dur": 38.812, + "args": { + "External id": 229295,"Sequence number": 959151, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6779 + } + }, + { + "ph": "s", "id": 25, "pid": 2070552, "tid": 2070552, "ts": 5327096493275.622, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096493300.246, "dur": 9.057, + "args": { + "External id": 229296,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096493304.192, "dur": 4.832, + "args": { + "External id": 229297,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096493377.609, "dur": 79.637, + "args": { + "External id": 229298,"Record function id": 0, "Ev Idx": 6782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096493459.201, "dur": 1055.181, + "args": { + "External id": 229299,"Record function id": 0, "Ev Idx": 6783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096493501.060, "dur": 1000.113, + "args": { + "External id": 229300,"Sequence number": 959152, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6784 + } + }, + { + "ph": "s", "id": 24, "pid": 2070552, "tid": 2070552, "ts": 5327096493501.060, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096493566.150, "dur": 41.253, + "args": { + "External id": 229301,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096493656.826, "dur": 96.426, + "args": { + "External id": 229302,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096493765.816, "dur": 37.544, + "args": { + "External id": 229303,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096493811.240, "dur": 30.413, + "args": { + "External id": 229304,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096493873.506, "dur": 27.446, + "args": { + "External id": 229305,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096493915.726, "dur": 14.533, + "args": { + "External id": 229306,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096493948.893, "dur": 147.917, + "args": { + "External id": 229307,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096494017.581, "dur": 12.774, + "args": { + "External id": 229308,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096494023.180, "dur": 6.163, + "args": { + "External id": 229309,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096494033.303, "dur": 3.786, + "args": { + "External id": 229310,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096494038.623, "dur": 1.023, + "args": { + "External id": 229311,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096494042.188, "dur": 4.195, + "args": { + "External id": 229312,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096494107.315, "dur": 47.986, + "args": { + "External id": 229313,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096494187.088, "dur": 28.948, + "args": { + "External id": 229314,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096494224.051, "dur": 41.236, + "args": { + "External id": 229315,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096494275.946, "dur": 34.935, + "args": { + "External id": 229316,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096494335.117, "dur": 25.042, + "args": { + "External id": 229317,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096494365.201, "dur": 33.327, + "args": { + "External id": 229318,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096494416.137, "dur": 18.806, + "args": { + "External id": 229319,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6803 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 2070552, "tid": 2070552, + "ts": 5327096494577.197, "dur": 121.906, + "args": { + "External id": 229320,"Record function id": 0, "Ev Idx": 6804 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096494775.504, "dur": 47.338, + "args": { + "External id": 229321,"Record function id": 0, "Ev Idx": 6805 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 2070552, "tid": 2070552, + "ts": 5327096494832.499, "dur": 18276.361, + "args": { + "External id": 229322,"Record function id": 0, "Ev Idx": 6806 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2070552, "tid": 2070552, + "ts": 5327096494840.069, "dur": 768.887, + "args": { + "External id": 229323,"Record function id": 0, "Ev Idx": 6807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096494920.733, "dur": 8.668, + "args": { + "External id": 229324,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096494944.343, "dur": 51.238, + "args": { + "External id": 229325,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096494949.535, "dur": 2.290, + "args": { + "External id": 229326,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096494955.432, "dur": 0.601, + "args": { + "External id": 229327,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096494957.110, "dur": 0.440, + "args": { + "External id": 229328,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096494958.970, "dur": 0.945, + "args": { + "External id": 229329,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096494962.903, "dur": 0.468, + "args": { + "External id": 229330,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096494964.190, "dur": 0.557, + "args": { + "External id": 229331,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096494965.796, "dur": 3.191, + "args": { + "External id": 229332,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096494969.894, "dur": 0.234, + "args": { + "External id": 229333,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096494971.901, "dur": 0.344, + "args": { + "External id": 229334,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096495008.754, "dur": 45.018, + "args": { + "External id": 229335,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096495089.096, "dur": 109.390, + "args": { + "External id": 229336,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096495100.223, "dur": 5.246, + "args": { + "External id": 229337,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096495110.733, "dur": 10.482, + "args": { + "External id": 229338,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096495115.396, "dur": 5.396, + "args": { + "External id": 229339,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096495118.799, "dur": 0.567, + "args": { + "External id": 229340,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096495127.782, "dur": 27.606, + "args": { + "External id": 229341,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096495129.721, "dur": 2.120, + "args": { + "External id": 229342,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096495133.335, "dur": 0.533, + "args": { + "External id": 229343,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096495135.487, "dur": 0.514, + "args": { + "External id": 229344,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096495138.794, "dur": 2.285, + "args": { + "External id": 229345,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096495142.415, "dur": 0.557, + "args": { + "External id": 229346,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096495144.060, "dur": 0.252, + "args": { + "External id": 229347,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096495147.137, "dur": 0.544, + "args": { + "External id": 229348,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096495148.993, "dur": 0.259, + "args": { + "External id": 229349,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096495150.390, "dur": 1.760, + "args": { + "External id": 229350,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096495168.514, "dur": 22.186, + "args": { + "External id": 229351,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096495251.842, "dur": 276.846, + "args": { + "External id": 229352,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096495281.721, "dur": 242.592, + "args": { + "External id": 229353,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6837, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096495291.954, "dur": 227.033, + "args": { + "External id": 229354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096495548.790, "dur": 2.158, + "args": { + "External id": 229355,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6839, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2070552, "tid": 2070552, + "ts": 5327096495671.430, "dur": 17234.316, + "args": { + "External id": 229356,"Record function id": 0, "Ev Idx": 6840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096495771.735, "dur": 6.182, + "args": { + "External id": 229357,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096495781.667, "dur": 1.049, + "args": { + "External id": 229358,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096495784.356, "dur": 2.054, + "args": { + "External id": 229359,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096495788.533, "dur": 0.736, + "args": { + "External id": 229360,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096495790.663, "dur": 0.733, + "args": { + "External id": 229361,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096495792.595, "dur": 0.835, + "args": { + "External id": 229362,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096495796.622, "dur": 0.740, + "args": { + "External id": 229363,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096495798.726, "dur": 1.624, + "args": { + "External id": 229364,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096495801.617, "dur": 0.812, + "args": { + "External id": 229365,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096495803.803, "dur": 0.430, + "args": { + "External id": 229366,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096495823.624, "dur": 17043.817, + "args": { + "External id": 229367,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096495838.663, "dur": 17022.205, + "args": { + "External id": 229368,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096495861.150, "dur": 15.465, + "args": { + "External id": 229369,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096495880.221, "dur": 16949.797, + "args": { + "External id": 229370,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096495882.665, "dur": 16946.817, + "args": { + "External id": 229371,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096495887.891, "dur": 5.115, + "args": { + "External id": 229372,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096495894.494, "dur": 16932.282, + "args": { + "External id": 229373,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096513048.586, "dur": 35.713, + "args": { + "External id": 229374,"Sequence number": 959153, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6858 + } + }, + { + "ph": "s", "id": 23, "pid": 2070552, "tid": 2070552, "ts": 5327096513048.586, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096513071.261, "dur": 7.975, + "args": { + "External id": 229375,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096513074.482, "dur": 4.414, + "args": { + "External id": 229376,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096513171.605, "dur": 81.148, + "args": { + "External id": 229377,"Record function id": 0, "Ev Idx": 6861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096513255.296, "dur": 1064.053, + "args": { + "External id": 229378,"Record function id": 0, "Ev Idx": 6862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096513296.913, "dur": 1008.036, + "args": { + "External id": 229379,"Sequence number": 959154, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6863 + } + }, + { + "ph": "s", "id": 22, "pid": 2070552, "tid": 2070552, "ts": 5327096513296.913, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096513363.237, "dur": 43.688, + "args": { + "External id": 229380,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096513419.396, "dur": 102.964, + "args": { + "External id": 229381,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096513533.239, "dur": 38.186, + "args": { + "External id": 229382,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096513577.468, "dur": 30.810, + "args": { + "External id": 229383,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096513673.844, "dur": 29.167, + "args": { + "External id": 229384,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096513720.324, "dur": 14.722, + "args": { + "External id": 229385,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096513753.867, "dur": 123.834, + "args": { + "External id": 229386,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096513801.360, "dur": 12.146, + "args": { + "External id": 229387,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096513806.229, "dur": 6.538, + "args": { + "External id": 229388,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096513816.378, "dur": 4.588, + "args": { + "External id": 229389,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096513822.148, "dur": 0.845, + "args": { + "External id": 229390,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096513826.472, "dur": 3.107, + "args": { + "External id": 229391,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096513888.130, "dur": 50.667, + "args": { + "External id": 229392,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096513968.318, "dur": 46.562, + "args": { + "External id": 229393,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096514025.692, "dur": 44.862, + "args": { + "External id": 229394,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096514076.940, "dur": 35.027, + "args": { + "External id": 229395,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096514135.343, "dur": 27.105, + "args": { + "External id": 229396,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096514167.641, "dur": 33.961, + "args": { + "External id": 229397,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096514219.136, "dur": 19.837, + "args": { + "External id": 229398,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6882 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 2070552, "tid": 2070552, + "ts": 5327096514382.543, "dur": 77.210, + "args": { + "External id": 229399,"Record function id": 0, "Ev Idx": 6883 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096514530.960, "dur": 45.293, + "args": { + "External id": 229400,"Record function id": 0, "Ev Idx": 6884 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 2070552, "tid": 2070552, + "ts": 5327096514585.732, "dur": 18460.328, + "args": { + "External id": 229401,"Record function id": 0, "Ev Idx": 6885 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2070552, "tid": 2070552, + "ts": 5327096514594.198, "dur": 831.824, + "args": { + "External id": 229402,"Record function id": 0, "Ev Idx": 6886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096514719.539, "dur": 9.793, + "args": { + "External id": 229403,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096514744.746, "dur": 33.077, + "args": { + "External id": 229404,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514750.234, "dur": 2.183, + "args": { + "External id": 229405,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514756.530, "dur": 0.277, + "args": { + "External id": 229406,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514757.866, "dur": 0.353, + "args": { + "External id": 229407,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514759.733, "dur": 0.509, + "args": { + "External id": 229408,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514762.692, "dur": 0.318, + "args": { + "External id": 229409,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514763.932, "dur": 0.449, + "args": { + "External id": 229410,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514765.659, "dur": 3.125, + "args": { + "External id": 229411,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514770.446, "dur": 0.193, + "args": { + "External id": 229412,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514771.627, "dur": 0.379, + "args": { + "External id": 229413,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096514788.389, "dur": 40.940, + "args": { + "External id": 229414,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096514862.360, "dur": 106.537, + "args": { + "External id": 229415,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096514872.842, "dur": 3.348, + "args": { + "External id": 229416,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096514881.142, "dur": 9.780, + "args": { + "External id": 229417,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096514885.533, "dur": 4.965, + "args": { + "External id": 229418,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514888.672, "dur": 0.646, + "args": { + "External id": 229419,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096514898.072, "dur": 25.831, + "args": { + "External id": 229420,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514900.670, "dur": 1.948, + "args": { + "External id": 229421,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514904.238, "dur": 0.647, + "args": { + "External id": 229422,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514905.815, "dur": 0.382, + "args": { + "External id": 229423,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514909.094, "dur": 1.106, + "args": { + "External id": 229424,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514911.466, "dur": 0.174, + "args": { + "External id": 229425,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514912.614, "dur": 0.168, + "args": { + "External id": 229426,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514914.957, "dur": 0.176, + "args": { + "External id": 229427,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514916.408, "dur": 0.167, + "args": { + "External id": 229428,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096514917.819, "dur": 2.331, + "args": { + "External id": 229429,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096514938.751, "dur": 22.195, + "args": { + "External id": 229430,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096515042.090, "dur": 295.566, + "args": { + "External id": 229431,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096515071.854, "dur": 261.342, + "args": { + "External id": 229432,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6916, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096515081.780, "dur": 246.168, + "args": { + "External id": 229433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096515359.342, "dur": 2.214, + "args": { + "External id": 229434,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6918, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2070552, "tid": 2070552, + "ts": 5327096515445.193, "dur": 17386.466, + "args": { + "External id": 229435,"Record function id": 0, "Ev Idx": 6919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096515540.082, "dur": 5.935, + "args": { + "External id": 229436,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096515549.389, "dur": 0.977, + "args": { + "External id": 229437,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096515552.066, "dur": 1.867, + "args": { + "External id": 229438,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096515555.415, "dur": 0.796, + "args": { + "External id": 229439,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096515557.503, "dur": 1.034, + "args": { + "External id": 229440,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096515559.899, "dur": 0.689, + "args": { + "External id": 229441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096515564.300, "dur": 0.606, + "args": { + "External id": 229442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096515566.342, "dur": 1.583, + "args": { + "External id": 229443,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096515569.259, "dur": 0.733, + "args": { + "External id": 229444,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096515571.273, "dur": 0.799, + "args": { + "External id": 229445,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096515590.953, "dur": 17195.283, + "args": { + "External id": 229446,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096515605.622, "dur": 17172.389, + "args": { + "External id": 229447,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096515666.945, "dur": 15.604, + "args": { + "External id": 229448,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096515686.875, "dur": 17057.431, + "args": { + "External id": 229449,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096515689.529, "dur": 17053.977, + "args": { + "External id": 229450,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096515694.940, "dur": 6.363, + "args": { + "External id": 229451,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096515703.197, "dur": 17037.419, + "args": { + "External id": 229452,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096532968.763, "dur": 49.383, + "args": { + "External id": 229453,"Sequence number": 959155, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6937 + } + }, + { + "ph": "s", "id": 21, "pid": 2070552, "tid": 2070552, "ts": 5327096532968.763, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096533003.687, "dur": 9.511, + "args": { + "External id": 229454,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096533007.093, "dur": 5.711, + "args": { + "External id": 229455,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096533083.414, "dur": 80.935, + "args": { + "External id": 229456,"Record function id": 0, "Ev Idx": 6940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096533165.960, "dur": 1071.431, + "args": { + "External id": 229457,"Record function id": 0, "Ev Idx": 6941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096533206.355, "dur": 1018.104, + "args": { + "External id": 229458,"Sequence number": 959156, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6942 + } + }, + { + "ph": "s", "id": 20, "pid": 2070552, "tid": 2070552, "ts": 5327096533206.355, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096533273.779, "dur": 42.955, + "args": { + "External id": 229459,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096533328.186, "dur": 101.611, + "args": { + "External id": 229460,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096533438.960, "dur": 36.491, + "args": { + "External id": 229461,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096533483.926, "dur": 30.958, + "args": { + "External id": 229462,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096533540.937, "dur": 24.947, + "args": { + "External id": 229463,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096533583.046, "dur": 14.387, + "args": { + "External id": 229464,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096533618.921, "dur": 167.233, + "args": { + "External id": 229465,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096533708.809, "dur": 13.583, + "args": { + "External id": 229466,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096533714.118, "dur": 7.341, + "args": { + "External id": 229467,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096533724.992, "dur": 4.508, + "args": { + "External id": 229468,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096533730.617, "dur": 1.448, + "args": { + "External id": 229469,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096533734.177, "dur": 2.356, + "args": { + "External id": 229470,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096533797.994, "dur": 53.214, + "args": { + "External id": 229471,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096533882.768, "dur": 28.839, + "args": { + "External id": 229472,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096533919.511, "dur": 41.670, + "args": { + "External id": 229473,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096533969.635, "dur": 55.087, + "args": { + "External id": 229474,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096534051.807, "dur": 26.475, + "args": { + "External id": 229475,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096534085.271, "dur": 35.080, + "args": { + "External id": 229476,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096534141.146, "dur": 18.045, + "args": { + "External id": 229477,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6961 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 2070552, "tid": 2070552, + "ts": 5327096534301.027, "dur": 76.918, + "args": { + "External id": 229478,"Record function id": 0, "Ev Idx": 6962 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096534449.141, "dur": 48.894, + "args": { + "External id": 229479,"Record function id": 0, "Ev Idx": 6963 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.26)", "pid": 2070552, "tid": 2070552, + "ts": 5327096534507.415, "dur": 18313.289, + "args": { + "External id": 229480,"Record function id": 0, "Ev Idx": 6964 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.26)", "pid": 2070552, "tid": 2070552, + "ts": 5327096534514.508, "dur": 831.839, + "args": { + "External id": 229481,"Record function id": 0, "Ev Idx": 6965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096534595.576, "dur": 7.457, + "args": { + "External id": 229482,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096534616.504, "dur": 73.157, + "args": { + "External id": 229483,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534661.694, "dur": 2.246, + "args": { + "External id": 229484,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534668.055, "dur": 0.269, + "args": { + "External id": 229485,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534669.770, "dur": 0.161, + "args": { + "External id": 229486,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534670.870, "dur": 0.937, + "args": { + "External id": 229487,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534675.274, "dur": 0.421, + "args": { + "External id": 229488,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534676.937, "dur": 0.358, + "args": { + "External id": 229489,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534678.353, "dur": 2.579, + "args": { + "External id": 229490,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534681.867, "dur": 0.660, + "args": { + "External id": 229491,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534683.620, "dur": 0.445, + "args": { + "External id": 229492,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096534702.401, "dur": 41.780, + "args": { + "External id": 229493,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096534778.973, "dur": 119.259, + "args": { + "External id": 229494,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096534790.399, "dur": 5.408, + "args": { + "External id": 229495,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096534801.162, "dur": 10.385, + "args": { + "External id": 229496,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096534805.607, "dur": 5.498, + "args": { + "External id": 229497,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534809.046, "dur": 0.765, + "args": { + "External id": 229498,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096534824.127, "dur": 28.753, + "args": { + "External id": 229499,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534826.092, "dur": 2.140, + "args": { + "External id": 229500,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534829.640, "dur": 0.943, + "args": { + "External id": 229501,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534832.057, "dur": 0.617, + "args": { + "External id": 229502,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534835.926, "dur": 0.401, + "args": { + "External id": 229503,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534837.778, "dur": 0.736, + "args": { + "External id": 229504,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534839.456, "dur": 0.655, + "args": { + "External id": 229505,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534842.873, "dur": 0.831, + "args": { + "External id": 229506,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534845.164, "dur": 0.732, + "args": { + "External id": 229507,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096534847.053, "dur": 1.771, + "args": { + "External id": 229508,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096534866.048, "dur": 23.404, + "args": { + "External id": 229509,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096534952.162, "dur": 303.766, + "args": { + "External id": 229510,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096534997.303, "dur": 253.996, + "args": { + "External id": 229511,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6995, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096535008.629, "dur": 237.255, + "args": { + "External id": 229512,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096535278.606, "dur": 2.046, + "args": { + "External id": 229513,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6997, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.26)", "pid": 2070552, "tid": 2070552, + "ts": 5327096535366.630, "dur": 17206.006, + "args": { + "External id": 229514,"Record function id": 0, "Ev Idx": 6998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096535463.321, "dur": 6.272, + "args": { + "External id": 229515,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096535473.071, "dur": 0.956, + "args": { + "External id": 229516,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096535475.642, "dur": 0.880, + "args": { + "External id": 229517,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096535478.516, "dur": 1.162, + "args": { + "External id": 229518,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096535480.866, "dur": 1.294, + "args": { + "External id": 229519,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096535483.466, "dur": 0.913, + "args": { + "External id": 229520,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096535487.804, "dur": 1.439, + "args": { + "External id": 229521,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096535490.766, "dur": 1.791, + "args": { + "External id": 229522,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096535493.718, "dur": 0.959, + "args": { + "External id": 229523,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096535495.869, "dur": 0.823, + "args": { + "External id": 229524,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096535515.691, "dur": 17011.120, + "args": { + "External id": 229525,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096535530.772, "dur": 16988.244, + "args": { + "External id": 229526,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096535550.825, "dur": 14.289, + "args": { + "External id": 229527,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096535568.752, "dur": 16916.162, + "args": { + "External id": 229528,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096535571.288, "dur": 16912.918, + "args": { + "External id": 229529,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096535576.626, "dur": 5.567, + "args": { + "External id": 229530,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096535583.592, "dur": 16897.599, + "args": { + "External id": 229531,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096552754.449, "dur": 38.069, + "args": { + "External id": 229532,"Sequence number": 959157, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7016 + } + }, + { + "ph": "s", "id": 19, "pid": 2070552, "tid": 2070552, "ts": 5327096552754.449, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096552777.706, "dur": 9.941, + "args": { + "External id": 229533,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096552781.893, "dur": 5.321, + "args": { + "External id": 229534,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096552858.459, "dur": 86.448, + "args": { + "External id": 229535,"Record function id": 0, "Ev Idx": 7019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096552946.430, "dur": 1082.206, + "args": { + "External id": 229536,"Record function id": 0, "Ev Idx": 7020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096553002.654, "dur": 1011.210, + "args": { + "External id": 229537,"Sequence number": 959158, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7021 + } + }, + { + "ph": "s", "id": 18, "pid": 2070552, "tid": 2070552, "ts": 5327096553002.654, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096553071.730, "dur": 45.965, + "args": { + "External id": 229538,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096553133.559, "dur": 105.988, + "args": { + "External id": 229539,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096553248.664, "dur": 36.788, + "args": { + "External id": 229540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096553295.060, "dur": 31.695, + "args": { + "External id": 229541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096553353.053, "dur": 24.556, + "args": { + "External id": 229542,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096553394.880, "dur": 13.841, + "args": { + "External id": 229543,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096553426.774, "dur": 127.163, + "args": { + "External id": 229544,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096553477.567, "dur": 11.905, + "args": { + "External id": 229545,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096553482.684, "dur": 6.063, + "args": { + "External id": 229546,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096553491.750, "dur": 4.844, + "args": { + "External id": 229547,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096553497.698, "dur": 1.666, + "args": { + "External id": 229548,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096553501.411, "dur": 3.398, + "args": { + "External id": 229549,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096553565.175, "dur": 43.229, + "args": { + "External id": 229550,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096553679.215, "dur": 32.019, + "args": { + "External id": 229551,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096553720.087, "dur": 44.736, + "args": { + "External id": 229552,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096553773.446, "dur": 34.014, + "args": { + "External id": 229553,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096553830.938, "dur": 23.637, + "args": { + "External id": 229554,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096553861.004, "dur": 33.684, + "args": { + "External id": 229555,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096553913.433, "dur": 17.825, + "args": { + "External id": 229556,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.26)", "pid": 2070552, "tid": 2070552, + "ts": 5327096554093.168, "dur": 76.756, + "args": { + "External id": 229557,"Record function id": 0, "Ev Idx": 7041 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096554239.863, "dur": 45.457, + "args": { + "External id": 229558,"Record function id": 0, "Ev Idx": 7042 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.27)", "pid": 2070552, "tid": 2070552, + "ts": 5327096554295.068, "dur": 18457.887, + "args": { + "External id": 229559,"Record function id": 0, "Ev Idx": 7043 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.27)", "pid": 2070552, "tid": 2070552, + "ts": 5327096554304.422, "dur": 826.389, + "args": { + "External id": 229560,"Record function id": 0, "Ev Idx": 7044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096554387.964, "dur": 8.732, + "args": { + "External id": 229561,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096554411.048, "dur": 31.827, + "args": { + "External id": 229562,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554416.651, "dur": 2.240, + "args": { + "External id": 229563,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554422.952, "dur": 0.647, + "args": { + "External id": 229564,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554424.826, "dur": 0.673, + "args": { + "External id": 229565,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554426.726, "dur": 0.629, + "args": { + "External id": 229566,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554429.382, "dur": 0.903, + "args": { + "External id": 229567,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554431.413, "dur": 0.844, + "args": { + "External id": 229568,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554433.451, "dur": 1.783, + "args": { + "External id": 229569,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554436.297, "dur": 0.401, + "args": { + "External id": 229570,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554437.438, "dur": 0.412, + "args": { + "External id": 229571,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096554453.497, "dur": 39.571, + "args": { + "External id": 229572,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096554525.806, "dur": 150.718, + "args": { + "External id": 229573,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096554535.772, "dur": 3.878, + "args": { + "External id": 229574,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096554544.852, "dur": 10.448, + "args": { + "External id": 229575,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096554549.714, "dur": 5.144, + "args": { + "External id": 229576,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554552.838, "dur": 0.709, + "args": { + "External id": 229577,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096554561.874, "dur": 24.886, + "args": { + "External id": 229578,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554563.598, "dur": 2.372, + "args": { + "External id": 229579,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554567.004, "dur": 0.793, + "args": { + "External id": 229580,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554568.911, "dur": 0.466, + "args": { + "External id": 229581,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554571.849, "dur": 0.729, + "args": { + "External id": 229582,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554573.487, "dur": 0.540, + "args": { + "External id": 229583,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554575.162, "dur": 0.394, + "args": { + "External id": 229584,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554577.836, "dur": 0.793, + "args": { + "External id": 229585,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554579.545, "dur": 0.594, + "args": { + "External id": 229586,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096554581.134, "dur": 2.053, + "args": { + "External id": 229587,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096554601.450, "dur": 64.981, + "args": { + "External id": 229588,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096554731.933, "dur": 307.002, + "args": { + "External id": 229589,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096554761.845, "dur": 272.141, + "args": { + "External id": 229590,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7074, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096554771.785, "dur": 255.796, + "args": { + "External id": 229591,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096555061.770, "dur": 2.183, + "args": { + "External id": 229592,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7076, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.27)", "pid": 2070552, "tid": 2070552, + "ts": 5327096555151.809, "dur": 17368.766, + "args": { + "External id": 229593,"Record function id": 0, "Ev Idx": 7077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096555248.852, "dur": 6.292, + "args": { + "External id": 229594,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096555258.616, "dur": 1.041, + "args": { + "External id": 229595,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096555261.137, "dur": 1.213, + "args": { + "External id": 229596,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096555264.238, "dur": 1.137, + "args": { + "External id": 229597,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096555266.637, "dur": 1.231, + "args": { + "External id": 229598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096555269.185, "dur": 0.974, + "args": { + "External id": 229599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096555273.558, "dur": 1.279, + "args": { + "External id": 229600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096555276.295, "dur": 2.318, + "args": { + "External id": 229601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096555279.957, "dur": 0.991, + "args": { + "External id": 229602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096555282.197, "dur": 1.191, + "args": { + "External id": 229603,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096555303.480, "dur": 17173.616, + "args": { + "External id": 229604,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096555319.083, "dur": 17150.627, + "args": { + "External id": 229605,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096555340.603, "dur": 14.325, + "args": { + "External id": 229606,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096555358.450, "dur": 17078.796, + "args": { + "External id": 229607,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096555360.938, "dur": 17075.630, + "args": { + "External id": 229608,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096555366.695, "dur": 6.074, + "args": { + "External id": 229609,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096555374.246, "dur": 17059.096, + "args": { + "External id": 229610,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096572685.792, "dur": 38.797, + "args": { + "External id": 229611,"Sequence number": 959159, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7095 + } + }, + { + "ph": "s", "id": 17, "pid": 2070552, "tid": 2070552, "ts": 5327096572685.792, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096572709.899, "dur": 9.824, + "args": { + "External id": 229612,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096572713.418, "dur": 5.851, + "args": { + "External id": 229613,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096572791.000, "dur": 83.622, + "args": { + "External id": 229614,"Record function id": 0, "Ev Idx": 7098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096572876.491, "dur": 1081.629, + "args": { + "External id": 229615,"Record function id": 0, "Ev Idx": 7099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096572917.486, "dur": 1027.788, + "args": { + "External id": 229616,"Sequence number": 959160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7100 + } + }, + { + "ph": "s", "id": 16, "pid": 2070552, "tid": 2070552, "ts": 5327096572917.486, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096572998.378, "dur": 46.466, + "args": { + "External id": 229617,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096573060.007, "dur": 105.605, + "args": { + "External id": 229618,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096573175.003, "dur": 37.982, + "args": { + "External id": 229619,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096573222.600, "dur": 32.853, + "args": { + "External id": 229620,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096573286.484, "dur": 28.189, + "args": { + "External id": 229621,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096573330.329, "dur": 13.589, + "args": { + "External id": 229622,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096573362.518, "dur": 128.687, + "args": { + "External id": 229623,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096573411.764, "dur": 11.701, + "args": { + "External id": 229624,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096573416.775, "dur": 5.991, + "args": { + "External id": 229625,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096573428.106, "dur": 3.943, + "args": { + "External id": 229626,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096573433.102, "dur": 1.240, + "args": { + "External id": 229627,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096573438.666, "dur": 3.321, + "args": { + "External id": 229628,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096573502.075, "dur": 44.281, + "args": { + "External id": 229629,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096573578.283, "dur": 27.954, + "args": { + "External id": 229630,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096573614.425, "dur": 82.499, + "args": { + "External id": 229631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096573708.032, "dur": 36.128, + "args": { + "External id": 229632,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096573767.930, "dur": 31.181, + "args": { + "External id": 229633,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096573804.939, "dur": 35.072, + "args": { + "External id": 229634,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096573860.301, "dur": 18.734, + "args": { + "External id": 229635,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7119 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.27)", "pid": 2070552, "tid": 2070552, + "ts": 5327096574039.264, "dur": 75.556, + "args": { + "External id": 229636,"Record function id": 0, "Ev Idx": 7120 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096574186.711, "dur": 47.085, + "args": { + "External id": 229637,"Record function id": 0, "Ev Idx": 7121 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.28)", "pid": 2070552, "tid": 2070552, + "ts": 5327096574243.266, "dur": 18442.808, + "args": { + "External id": 229638,"Record function id": 0, "Ev Idx": 7122 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.28)", "pid": 2070552, "tid": 2070552, + "ts": 5327096574251.297, "dur": 860.693, + "args": { + "External id": 229639,"Record function id": 0, "Ev Idx": 7123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096574336.876, "dur": 8.913, + "args": { + "External id": 229640,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096574360.074, "dur": 32.904, + "args": { + "External id": 229641,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574365.478, "dur": 2.482, + "args": { + "External id": 229642,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574371.827, "dur": 0.456, + "args": { + "External id": 229643,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574373.243, "dur": 0.534, + "args": { + "External id": 229644,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574374.747, "dur": 0.757, + "args": { + "External id": 229645,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574378.539, "dur": 0.641, + "args": { + "External id": 229646,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574380.174, "dur": 0.382, + "args": { + "External id": 229647,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574381.891, "dur": 2.460, + "args": { + "External id": 229648,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574385.351, "dur": 0.633, + "args": { + "External id": 229649,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574386.877, "dur": 0.387, + "args": { + "External id": 229650,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096574404.116, "dur": 39.273, + "args": { + "External id": 229651,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096574475.612, "dur": 103.891, + "args": { + "External id": 229652,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096574485.852, "dur": 3.643, + "args": { + "External id": 229653,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096574494.575, "dur": 10.262, + "args": { + "External id": 229654,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096574499.295, "dur": 5.144, + "args": { + "External id": 229655,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574502.605, "dur": 0.634, + "args": { + "External id": 229656,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096574511.107, "dur": 25.419, + "args": { + "External id": 229657,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574512.763, "dur": 2.818, + "args": { + "External id": 229658,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574516.616, "dur": 0.592, + "args": { + "External id": 229659,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574518.049, "dur": 0.707, + "args": { + "External id": 229660,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574521.721, "dur": 0.432, + "args": { + "External id": 229661,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574523.235, "dur": 0.341, + "args": { + "External id": 229662,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574524.494, "dur": 0.629, + "args": { + "External id": 229663,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574527.412, "dur": 0.878, + "args": { + "External id": 229664,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574529.393, "dur": 0.666, + "args": { + "External id": 229665,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096574530.852, "dur": 2.016, + "args": { + "External id": 229666,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096574548.837, "dur": 22.482, + "args": { + "External id": 229667,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096574672.927, "dur": 336.956, + "args": { + "External id": 229668,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096574702.910, "dur": 301.805, + "args": { + "External id": 229669,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7153, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096574713.462, "dur": 284.197, + "args": { + "External id": 229670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096575034.580, "dur": 2.840, + "args": { + "External id": 229671,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7155, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.28)", "pid": 2070552, "tid": 2070552, + "ts": 5327096575132.302, "dur": 17318.451, + "args": { + "External id": 229672,"Record function id": 0, "Ev Idx": 7156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096575233.837, "dur": 6.902, + "args": { + "External id": 229673,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096575244.361, "dur": 1.298, + "args": { + "External id": 229674,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096575247.550, "dur": 1.086, + "args": { + "External id": 229675,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096575250.539, "dur": 0.852, + "args": { + "External id": 229676,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096575252.836, "dur": 1.111, + "args": { + "External id": 229677,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096575255.437, "dur": 1.068, + "args": { + "External id": 229678,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096575260.102, "dur": 1.129, + "args": { + "External id": 229679,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096575262.500, "dur": 2.100, + "args": { + "External id": 229680,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096575265.801, "dur": 1.081, + "args": { + "External id": 229681,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096575268.106, "dur": 0.988, + "args": { + "External id": 229682,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096575288.394, "dur": 17118.346, + "args": { + "External id": 229683,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096575303.712, "dur": 17094.733, + "args": { + "External id": 229684,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096575324.637, "dur": 13.861, + "args": { + "External id": 229685,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096575341.885, "dur": 17020.862, + "args": { + "External id": 229686,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096575344.500, "dur": 17017.440, + "args": { + "External id": 229687,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096575350.546, "dur": 5.385, + "args": { + "External id": 229688,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096575357.608, "dur": 17000.938, + "args": { + "External id": 229689,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096592591.410, "dur": 66.382, + "args": { + "External id": 229690,"Sequence number": 959161, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7174 + } + }, + { + "ph": "s", "id": 15, "pid": 2070552, "tid": 2070552, "ts": 5327096592591.410, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096592613.794, "dur": 38.329, + "args": { + "External id": 229691,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096592617.292, "dur": 33.958, + "args": { + "External id": 229692,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096592725.278, "dur": 83.744, + "args": { + "External id": 229693,"Record function id": 0, "Ev Idx": 7177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096592810.645, "dur": 1056.748, + "args": { + "External id": 229694,"Record function id": 0, "Ev Idx": 7178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096592851.375, "dur": 1003.012, + "args": { + "External id": 229695,"Sequence number": 959162, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7179 + } + }, + { + "ph": "s", "id": 14, "pid": 2070552, "tid": 2070552, "ts": 5327096592851.375, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096592917.061, "dur": 44.044, + "args": { + "External id": 229696,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096592973.734, "dur": 117.652, + "args": { + "External id": 229697,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096593103.260, "dur": 37.779, + "args": { + "External id": 229698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096593148.980, "dur": 30.697, + "args": { + "External id": 229699,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096593206.187, "dur": 27.379, + "args": { + "External id": 229700,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096593251.234, "dur": 14.258, + "args": { + "External id": 229701,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096593282.941, "dur": 127.116, + "args": { + "External id": 229702,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096593334.320, "dur": 11.984, + "args": { + "External id": 229703,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096593339.584, "dur": 5.886, + "args": { + "External id": 229704,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096593348.762, "dur": 4.453, + "args": { + "External id": 229705,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096593354.456, "dur": 1.004, + "args": { + "External id": 229706,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096593357.871, "dur": 3.363, + "args": { + "External id": 229707,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096593419.944, "dur": 43.961, + "args": { + "External id": 229708,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096593492.240, "dur": 31.120, + "args": { + "External id": 229709,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096593530.888, "dur": 39.934, + "args": { + "External id": 229710,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096593578.924, "dur": 33.932, + "args": { + "External id": 229711,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096593674.542, "dur": 28.223, + "args": { + "External id": 229712,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096593710.480, "dur": 39.687, + "args": { + "External id": 229713,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096593769.538, "dur": 19.735, + "args": { + "External id": 229714,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7198 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.28)", "pid": 2070552, "tid": 2070552, + "ts": 5327096593930.154, "dur": 93.173, + "args": { + "External id": 229715,"Record function id": 0, "Ev Idx": 7199 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096594099.842, "dur": 48.719, + "args": { + "External id": 229716,"Record function id": 0, "Ev Idx": 7200 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.29)", "pid": 2070552, "tid": 2070552, + "ts": 5327096594158.025, "dur": 18355.934, + "args": { + "External id": 229717,"Record function id": 0, "Ev Idx": 7201 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.29)", "pid": 2070552, "tid": 2070552, + "ts": 5327096594167.228, "dur": 845.619, + "args": { + "External id": 229718,"Record function id": 0, "Ev Idx": 7202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096594249.314, "dur": 9.114, + "args": { + "External id": 229719,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096594272.554, "dur": 36.185, + "args": { + "External id": 229720,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594277.714, "dur": 2.119, + "args": { + "External id": 229721,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594283.950, "dur": 0.602, + "args": { + "External id": 229722,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594285.506, "dur": 0.720, + "args": { + "External id": 229723,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594287.205, "dur": 0.621, + "args": { + "External id": 229724,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594291.686, "dur": 0.384, + "args": { + "External id": 229725,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594293.438, "dur": 0.895, + "args": { + "External id": 229726,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594296.038, "dur": 2.722, + "args": { + "External id": 229727,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594300.079, "dur": 0.949, + "args": { + "External id": 229728,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594301.908, "dur": 0.596, + "args": { + "External id": 229729,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096594320.214, "dur": 40.281, + "args": { + "External id": 229730,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096594392.686, "dur": 109.814, + "args": { + "External id": 229731,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096594402.843, "dur": 4.075, + "args": { + "External id": 229732,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096594411.818, "dur": 10.310, + "args": { + "External id": 229733,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096594416.195, "dur": 5.496, + "args": { + "External id": 229734,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594419.620, "dur": 0.758, + "args": { + "External id": 229735,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096594428.478, "dur": 28.238, + "args": { + "External id": 229736,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594430.127, "dur": 2.470, + "args": { + "External id": 229737,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594433.862, "dur": 0.598, + "args": { + "External id": 229738,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594435.804, "dur": 0.740, + "args": { + "External id": 229739,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594439.245, "dur": 0.778, + "args": { + "External id": 229740,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594441.744, "dur": 0.589, + "args": { + "External id": 229741,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594443.313, "dur": 0.764, + "args": { + "External id": 229742,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594447.380, "dur": 0.807, + "args": { + "External id": 229743,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594449.038, "dur": 0.642, + "args": { + "External id": 229744,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096594451.069, "dur": 1.898, + "args": { + "External id": 229745,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096594471.882, "dur": 22.339, + "args": { + "External id": 229746,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096594556.448, "dur": 344.682, + "args": { + "External id": 229747,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096594585.585, "dur": 310.763, + "args": { + "External id": 229748,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7232, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096594595.103, "dur": 295.093, + "args": { + "External id": 229749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096594926.987, "dur": 2.652, + "args": { + "External id": 229750,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7234, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.29)", "pid": 2070552, "tid": 2070552, + "ts": 5327096595035.378, "dur": 17276.122, + "args": { + "External id": 229751,"Record function id": 0, "Ev Idx": 7235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096595136.838, "dur": 6.644, + "args": { + "External id": 229752,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096595147.072, "dur": 1.460, + "args": { + "External id": 229753,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096595150.541, "dur": 1.446, + "args": { + "External id": 229754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096595153.814, "dur": 1.293, + "args": { + "External id": 229755,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096595156.761, "dur": 1.049, + "args": { + "External id": 229756,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096595159.103, "dur": 0.917, + "args": { + "External id": 229757,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096595163.651, "dur": 0.988, + "args": { + "External id": 229758,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096595166.020, "dur": 2.541, + "args": { + "External id": 229759,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096595170.103, "dur": 1.104, + "args": { + "External id": 229760,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096595172.579, "dur": 0.971, + "args": { + "External id": 229761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096595204.696, "dur": 17062.501, + "args": { + "External id": 229762,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096595221.025, "dur": 17038.971, + "args": { + "External id": 229763,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096595236.846, "dur": 13.032, + "args": { + "External id": 229764,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096595253.354, "dur": 16970.812, + "args": { + "External id": 229765,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096595255.871, "dur": 16967.415, + "args": { + "External id": 229766,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096595261.427, "dur": 5.931, + "args": { + "External id": 229767,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096595268.841, "dur": 16951.470, + "args": { + "External id": 229768,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096612451.243, "dur": 36.937, + "args": { + "External id": 229769,"Sequence number": 959163, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7253 + } + }, + { + "ph": "s", "id": 13, "pid": 2070552, "tid": 2070552, "ts": 5327096612451.243, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096612473.846, "dur": 9.558, + "args": { + "External id": 229770,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096612477.957, "dur": 5.154, + "args": { + "External id": 229771,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096612552.224, "dur": 130.214, + "args": { + "External id": 229772,"Record function id": 0, "Ev Idx": 7256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096612686.259, "dur": 1075.277, + "args": { + "External id": 229773,"Record function id": 0, "Ev Idx": 7257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096612730.245, "dur": 1017.302, + "args": { + "External id": 229774,"Sequence number": 959164, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7258 + } + }, + { + "ph": "s", "id": 12, "pid": 2070552, "tid": 2070552, "ts": 5327096612730.245, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096612805.375, "dur": 45.947, + "args": { + "External id": 229775,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096612865.702, "dur": 101.739, + "args": { + "External id": 229776,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096612991.659, "dur": 43.107, + "args": { + "External id": 229777,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096613043.863, "dur": 31.183, + "args": { + "External id": 229778,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096613104.964, "dur": 26.293, + "args": { + "External id": 229779,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096613149.087, "dur": 17.829, + "args": { + "External id": 229780,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096613184.274, "dur": 131.135, + "args": { + "External id": 229781,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096613234.872, "dur": 12.785, + "args": { + "External id": 229782,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096613240.423, "dur": 6.328, + "args": { + "External id": 229783,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096613250.150, "dur": 4.160, + "args": { + "External id": 229784,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096613255.743, "dur": 2.860, + "args": { + "External id": 229785,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096613260.990, "dur": 3.527, + "args": { + "External id": 229786,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096613325.840, "dur": 43.178, + "args": { + "External id": 229787,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096613398.746, "dur": 28.303, + "args": { + "External id": 229788,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096613436.775, "dur": 39.941, + "args": { + "External id": 229789,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096613483.730, "dur": 34.742, + "args": { + "External id": 229790,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096613541.152, "dur": 23.732, + "args": { + "External id": 229791,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096613571.969, "dur": 33.686, + "args": { + "External id": 229792,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096613658.465, "dur": 23.044, + "args": { + "External id": 229793,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7277 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.29)", "pid": 2070552, "tid": 2070552, + "ts": 5327096613823.474, "dur": 73.448, + "args": { + "External id": 229794,"Record function id": 0, "Ev Idx": 7278 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096613966.501, "dur": 62.190, + "args": { + "External id": 229795,"Record function id": 0, "Ev Idx": 7279 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.30)", "pid": 2070552, "tid": 2070552, + "ts": 5327096614039.801, "dur": 18369.590, + "args": { + "External id": 229796,"Record function id": 0, "Ev Idx": 7280 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.30)", "pid": 2070552, "tid": 2070552, + "ts": 5327096614048.670, "dur": 825.833, + "args": { + "External id": 229797,"Record function id": 0, "Ev Idx": 7281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096614127.926, "dur": 8.689, + "args": { + "External id": 229798,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096614151.197, "dur": 35.599, + "args": { + "External id": 229799,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614156.871, "dur": 2.360, + "args": { + "External id": 229800,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614164.033, "dur": 0.647, + "args": { + "External id": 229801,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614165.876, "dur": 0.749, + "args": { + "External id": 229802,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614168.482, "dur": 0.567, + "args": { + "External id": 229803,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614171.536, "dur": 0.520, + "args": { + "External id": 229804,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614173.162, "dur": 0.794, + "args": { + "External id": 229805,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614174.934, "dur": 2.094, + "args": { + "External id": 229806,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614178.251, "dur": 0.729, + "args": { + "External id": 229807,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614180.003, "dur": 1.032, + "args": { + "External id": 229808,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096614197.748, "dur": 39.606, + "args": { + "External id": 229809,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096614268.523, "dur": 108.558, + "args": { + "External id": 229810,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096614279.232, "dur": 4.255, + "args": { + "External id": 229811,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096614288.616, "dur": 10.495, + "args": { + "External id": 229812,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096614292.917, "dur": 5.753, + "args": { + "External id": 229813,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614296.261, "dur": 1.067, + "args": { + "External id": 229814,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096614305.897, "dur": 28.166, + "args": { + "External id": 229815,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614307.502, "dur": 2.404, + "args": { + "External id": 229816,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614311.440, "dur": 0.448, + "args": { + "External id": 229817,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614312.873, "dur": 0.416, + "args": { + "External id": 229818,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614316.455, "dur": 0.504, + "args": { + "External id": 229819,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614318.054, "dur": 0.496, + "args": { + "External id": 229820,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614319.792, "dur": 0.561, + "args": { + "External id": 229821,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614322.934, "dur": 0.842, + "args": { + "External id": 229822,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614325.160, "dur": 1.103, + "args": { + "External id": 229823,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096614327.384, "dur": 2.443, + "args": { + "External id": 229824,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096614347.879, "dur": 20.981, + "args": { + "External id": 229825,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096614427.559, "dur": 351.403, + "args": { + "External id": 229826,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096614456.097, "dur": 317.754, + "args": { + "External id": 229827,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7311, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096614465.533, "dur": 302.364, + "args": { + "External id": 229828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096614802.303, "dur": 2.609, + "args": { + "External id": 229829,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7313, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.30)", "pid": 2070552, "tid": 2070552, + "ts": 5327096614895.111, "dur": 17316.484, + "args": { + "External id": 229830,"Record function id": 0, "Ev Idx": 7314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096615010.006, "dur": 7.057, + "args": { + "External id": 229831,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096615021.030, "dur": 1.722, + "args": { + "External id": 229832,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096615024.601, "dur": 1.148, + "args": { + "External id": 229833,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096615027.636, "dur": 1.163, + "args": { + "External id": 229834,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096615030.190, "dur": 0.929, + "args": { + "External id": 229835,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096615032.484, "dur": 0.945, + "args": { + "External id": 229836,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096615036.731, "dur": 1.199, + "args": { + "External id": 229837,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096615039.469, "dur": 2.502, + "args": { + "External id": 229838,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096615043.545, "dur": 1.146, + "args": { + "External id": 229839,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096615046.358, "dur": 1.030, + "args": { + "External id": 229840,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096615069.251, "dur": 17093.068, + "args": { + "External id": 229841,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096615084.894, "dur": 17069.696, + "args": { + "External id": 229842,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096615103.093, "dur": 15.216, + "args": { + "External id": 229843,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096615121.980, "dur": 16996.892, + "args": { + "External id": 229844,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096615124.711, "dur": 16993.468, + "args": { + "External id": 229845,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096615130.412, "dur": 6.419, + "args": { + "External id": 229846,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096615138.745, "dur": 16976.759, + "args": { + "External id": 229847,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096632354.592, "dur": 29.705, + "args": { + "External id": 229848,"Sequence number": 959165, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7332 + } + }, + { + "ph": "s", "id": 11, "pid": 2070552, "tid": 2070552, "ts": 5327096632354.592, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096632370.336, "dur": 9.117, + "args": { + "External id": 229849,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096632373.973, "dur": 5.237, + "args": { + "External id": 229850,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096632447.532, "dur": 81.312, + "args": { + "External id": 229851,"Record function id": 0, "Ev Idx": 7335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096632530.466, "dur": 1074.148, + "args": { + "External id": 229852,"Record function id": 0, "Ev Idx": 7336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096632571.347, "dur": 1020.372, + "args": { + "External id": 229853,"Sequence number": 959166, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7337 + } + }, + { + "ph": "s", "id": 10, "pid": 2070552, "tid": 2070552, "ts": 5327096632571.347, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096632686.663, "dur": 48.503, + "args": { + "External id": 229854,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096632751.230, "dur": 104.645, + "args": { + "External id": 229855,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096632864.789, "dur": 37.356, + "args": { + "External id": 229856,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096632911.267, "dur": 30.505, + "args": { + "External id": 229857,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096632967.826, "dur": 40.049, + "args": { + "External id": 229858,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096633029.106, "dur": 14.967, + "args": { + "External id": 229859,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096633063.372, "dur": 127.465, + "args": { + "External id": 229860,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096633113.053, "dur": 13.229, + "args": { + "External id": 229861,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096633118.350, "dur": 6.883, + "args": { + "External id": 229862,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096633128.998, "dur": 4.363, + "args": { + "External id": 229863,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096633134.467, "dur": 1.106, + "args": { + "External id": 229864,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096633138.500, "dur": 3.612, + "args": { + "External id": 229865,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096633201.170, "dur": 49.647, + "args": { + "External id": 229866,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096633281.885, "dur": 28.673, + "args": { + "External id": 229867,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096633318.138, "dur": 40.268, + "args": { + "External id": 229868,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096633367.579, "dur": 33.928, + "args": { + "External id": 229869,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096633425.105, "dur": 25.439, + "args": { + "External id": 229870,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096633457.350, "dur": 33.216, + "args": { + "External id": 229871,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096633510.143, "dur": 17.861, + "args": { + "External id": 229872,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7356 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.30)", "pid": 2070552, "tid": 2070552, + "ts": 5327096633720.710, "dur": 76.206, + "args": { + "External id": 229873,"Record function id": 0, "Ev Idx": 7357 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5327096633869.909, "dur": 45.588, + "args": { + "External id": 229874,"Record function id": 0, "Ev Idx": 7358 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.31)", "pid": 2070552, "tid": 2070552, + "ts": 5327096633924.325, "dur": 18472.502, + "args": { + "External id": 229875,"Record function id": 0, "Ev Idx": 7359 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.31)", "pid": 2070552, "tid": 2070552, + "ts": 5327096633931.418, "dur": 847.098, + "args": { + "External id": 229876,"Record function id": 0, "Ev Idx": 7360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096634031.626, "dur": 10.142, + "args": { + "External id": 229877,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096634057.449, "dur": 34.630, + "args": { + "External id": 229878,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634062.845, "dur": 2.490, + "args": { + "External id": 229879,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634069.851, "dur": 0.727, + "args": { + "External id": 229880,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634071.583, "dur": 0.584, + "args": { + "External id": 229881,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634073.686, "dur": 0.372, + "args": { + "External id": 229882,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634077.003, "dur": 0.812, + "args": { + "External id": 229883,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634078.879, "dur": 0.748, + "args": { + "External id": 229884,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634080.641, "dur": 2.402, + "args": { + "External id": 229885,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634084.306, "dur": 0.571, + "args": { + "External id": 229886,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634085.557, "dur": 0.565, + "args": { + "External id": 229887,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096634102.580, "dur": 43.593, + "args": { + "External id": 229888,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5327096634199.876, "dur": 112.070, + "args": { + "External id": 229889,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096634212.060, "dur": 5.484, + "args": { + "External id": 229890,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5327096634222.756, "dur": 9.952, + "args": { + "External id": 229891,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096634227.095, "dur": 5.172, + "args": { + "External id": 229892,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634230.088, "dur": 0.720, + "args": { + "External id": 229893,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5327096634239.502, "dur": 25.206, + "args": { + "External id": 229894,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634241.335, "dur": 0.846, + "args": { + "External id": 229895,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634243.694, "dur": 2.257, + "args": { + "External id": 229896,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634247.383, "dur": 0.617, + "args": { + "External id": 229897,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634248.671, "dur": 0.577, + "args": { + "External id": 229898,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634252.639, "dur": 0.402, + "args": { + "External id": 229899,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634254.045, "dur": 0.305, + "args": { + "External id": 229900,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634255.520, "dur": 0.418, + "args": { + "External id": 229901,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634258.593, "dur": 0.526, + "args": { + "External id": 229902,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096634260.604, "dur": 0.447, + "args": { + "External id": 229903,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096634279.251, "dur": 24.136, + "args": { + "External id": 229904,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5327096634365.652, "dur": 319.684, + "args": { + "External id": 229905,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096634394.354, "dur": 286.038, + "args": { + "External id": 229906,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7390, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5327096634403.845, "dur": 270.290, + "args": { + "External id": 229907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327096634708.907, "dur": 2.821, + "args": { + "External id": 229908,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7392, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.31)", "pid": 2070552, "tid": 2070552, + "ts": 5327096634798.415, "dur": 17395.084, + "args": { + "External id": 229909,"Record function id": 0, "Ev Idx": 7393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096634896.271, "dur": 6.297, + "args": { + "External id": 229910,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096634905.784, "dur": 1.551, + "args": { + "External id": 229911,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096634909.155, "dur": 1.055, + "args": { + "External id": 229912,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096634911.858, "dur": 0.832, + "args": { + "External id": 229913,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096634914.273, "dur": 1.080, + "args": { + "External id": 229914,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096634916.568, "dur": 1.269, + "args": { + "External id": 229915,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096634921.243, "dur": 0.974, + "args": { + "External id": 229916,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096634923.399, "dur": 1.964, + "args": { + "External id": 229917,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096634926.807, "dur": 0.913, + "args": { + "External id": 229918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096634928.894, "dur": 0.947, + "args": { + "External id": 229919,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096634949.225, "dur": 17200.521, + "args": { + "External id": 229920,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096634963.621, "dur": 17178.189, + "args": { + "External id": 229921,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096634998.853, "dur": 15.387, + "args": { + "External id": 229922,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096635018.096, "dur": 17088.569, + "args": { + "External id": 229923,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096635020.441, "dur": 17085.572, + "args": { + "External id": 229924,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096635026.760, "dur": 6.449, + "args": { + "External id": 229925,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096635034.800, "dur": 17067.707, + "args": { + "External id": 229926,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096652332.161, "dur": 39.921, + "args": { + "External id": 229927,"Sequence number": 959167, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7411 + } + }, + { + "ph": "s", "id": 9, "pid": 2070552, "tid": 2070552, "ts": 5327096652332.161, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327096652358.121, "dur": 9.208, + "args": { + "External id": 229928,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096652361.702, "dur": 5.423, + "args": { + "External id": 229929,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096652434.925, "dur": 82.837, + "args": { + "External id": 229930,"Record function id": 0, "Ev Idx": 7414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5327096652519.279, "dur": 1070.951, + "args": { + "External id": 229931,"Record function id": 0, "Ev Idx": 7415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096652562.131, "dur": 1014.674, + "args": { + "External id": 229932,"Sequence number": 959168, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7416 + } + }, + { + "ph": "s", "id": 8, "pid": 2070552, "tid": 2070552, "ts": 5327096652562.131, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096652660.953, "dur": 47.375, + "args": { + "External id": 229933,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096652723.734, "dur": 103.759, + "args": { + "External id": 229934,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096652839.308, "dur": 39.441, + "args": { + "External id": 229935,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096652886.021, "dur": 30.908, + "args": { + "External id": 229936,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096652950.250, "dur": 25.025, + "args": { + "External id": 229937,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096653011.156, "dur": 18.849, + "args": { + "External id": 229938,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096653049.097, "dur": 131.795, + "args": { + "External id": 229939,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096653099.260, "dur": 12.358, + "args": { + "External id": 229940,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096653104.371, "dur": 6.487, + "args": { + "External id": 229941,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096653114.131, "dur": 5.191, + "args": { + "External id": 229942,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096653120.672, "dur": 1.309, + "args": { + "External id": 229943,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096653126.208, "dur": 3.819, + "args": { + "External id": 229944,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096653191.067, "dur": 49.115, + "args": { + "External id": 229945,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5327096653272.012, "dur": 29.296, + "args": { + "External id": 229946,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096653310.093, "dur": 40.413, + "args": { + "External id": 229947,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096653357.510, "dur": 35.100, + "args": { + "External id": 229948,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096653415.192, "dur": 25.212, + "args": { + "External id": 229949,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096653446.045, "dur": 33.112, + "args": { + "External id": 229950,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5327096653496.340, "dur": 15.861, + "args": { + "External id": 229951,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7435 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.31)", "pid": 2070552, "tid": 2070552, + "ts": 5327096653691.834, "dur": 36.128, + "args": { + "External id": 229952,"Record function id": 0, "Ev Idx": 7436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5327096653796.263, "dur": 33.974, + "args": { + "External id": 229953,"Record function id": 0, "Ev Idx": 7437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 2070552, "tid": 2070552, + "ts": 5327096653831.559, "dur": 207.244, + "args": { + "External id": 229954,"Record function id": 0, "Ev Idx": 7438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096653866.794, "dur": 163.713, + "args": { + "External id": 229955,"Sequence number": 959169, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [8388608, 2048, 1]], "Input Dims": [[2048], [16, 4096, 2048]], "Ev Idx": 7439 + } + }, + { + "ph": "s", "id": 7, "pid": 2070552, "tid": 2070552, "ts": 5327096653866.794, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5327096653933.179, "dur": 42.650, + "args": { + "External id": 229956,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096654102.966, "dur": 0.891, + "args": { + "External id": 229957,"Sequence number": 959170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], [], []], "Ev Idx": 7441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096654112.581, "dur": 13.348, + "args": { + "External id": 229958,"Sequence number": 959170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096654119.598, "dur": 3.111, + "args": { + "External id": 229959,"Record function id": 0, "Concrete Inputs": ["", "[16, 8191]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096654130.550, "dur": 3.098, + "args": { + "External id": 229960,"Sequence number": 959170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096654132.044, "dur": 0.869, + "args": { + "External id": 229961,"Record function id": 0, "Concrete Inputs": ["", "[16, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096654134.785, "dur": 3.334, + "args": { + "External id": 229962,"Sequence number": 959170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096654136.598, "dur": 0.579, + "args": { + "External id": 229963,"Record function id": 0, "Concrete Inputs": ["", "[16, 1]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::full_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096654145.659, "dur": 44.686, + "args": { + "External id": 229964,"Record function id": 0, "Concrete Inputs": ["", "-100", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 1], [], [], [], [], [], []], "Ev Idx": 7448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096654147.612, "dur": 12.789, + "args": { + "External id": 229965,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["long int", "", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 1], [], [], [], [], []], "Ev Idx": 7449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096654153.584, "dur": 6.269, + "args": { + "External id": 229966,"Record function id": 0, "Concrete Inputs": ["[16, 1]", "[1, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096654162.544, "dur": 27.389, + "args": { + "External id": 229967,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1, 1], []], "Input Dims": [[16, 1], []], "Ev Idx": 7451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2070552, "tid": 2070552, + "ts": 5327096654199.365, "dur": 28.598, + "args": { + "External id": 229968,"Sequence number": 959170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[8192, 1], [1, 1]], []], "Input Dims": [[[16, 8191], [16, 1]], []], "Ev Idx": 7452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096654235.357, "dur": 3.789, + "args": { + "External id": 229969,"Sequence number": 959170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096654237.457, "dur": 0.912, + "args": { + "External id": 229970,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2070552, "tid": 2070552, + "ts": 5327096654243.911, "dur": 35.178, + "args": { + "External id": 229971,"Sequence number": 959170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2070552, "tid": 2070552, + "ts": 5327096654245.798, "dur": 33.087, + "args": { + "External id": 229972,"Sequence number": 959170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096654247.963, "dur": 7.407, + "args": { + "External id": 229973,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 7457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096654251.924, "dur": 2.932, + "args": { + "External id": 229974,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096654256.704, "dur": 21.765, + "args": { + "External id": 229975,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 7459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096654304.332, "dur": 7.216, + "args": { + "External id": 229976,"Sequence number": 959170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7460 + } + }, + { + "ph": "s", "id": 6, "pid": 2070552, "tid": 2070552, "ts": 5327096654304.332, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096654314.022, "dur": 1.313, + "args": { + "External id": 229977,"Sequence number": 959171, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096654343.631, "dur": 18506.340, + "args": { + "External id": 229978,"Sequence number": 959171, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536], [32000, 2048], [], [], [], [], []], "Ev Idx": 7462 + } + }, + { + "ph": "s", "id": 5, "pid": 2070552, "tid": 2070552, "ts": 5327096654343.631, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096654374.194, "dur": 31.807, + "args": { + "External id": 229979,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096654375.565, "dur": 8.912, + "args": { + "External id": 229980,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096654377.483, "dur": 6.599, + "args": { + "External id": 229981,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070552, "tid": 2070552, + "ts": 5327096654386.555, "dur": 18.989, + "args": { + "External id": 229982,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096654387.703, "dur": 17.394, + "args": { + "External id": 229983,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 7467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096654410.202, "dur": 22.057, + "args": { + "External id": 229984,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096654410.896, "dur": 4.836, + "args": { + "External id": 229985,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096654412.441, "dur": 3.033, + "args": { + "External id": 229986,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070552, "tid": 2070552, + "ts": 5327096654416.432, "dur": 15.611, + "args": { + "External id": 229987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096654418.749, "dur": 12.942, + "args": { + "External id": 229988,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 7472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2070552, "tid": 2070552, + "ts": 5327096654438.700, "dur": 17.589, + "args": { + "External id": 229989,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 7473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096654440.639, "dur": 3.104, + "args": { + "External id": 229990,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070552, "tid": 2070552, + "ts": 5327096654444.381, "dur": 11.630, + "args": { + "External id": 229991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 7475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096654445.238, "dur": 10.480, + "args": { + "External id": 229992,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070552, "tid": 2070552, + "ts": 5327096654463.214, "dur": 28.173, + "args": { + "External id": 229993,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096654495.956, "dur": 56.490, + "args": { + "External id": 229994,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096654498.982, "dur": 53.022, + "args": { + "External id": 229995,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096654506.422, "dur": 1.101, + "args": { + "External id": 229996,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 7480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096654508.867, "dur": 25.545, + "args": { + "External id": 229997,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096654512.522, "dur": 21.680, + "args": { + "External id": 229998,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 7482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096654515.041, "dur": 2.893, + "args": { + "External id": 229999,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096654519.283, "dur": 14.537, + "args": { + "External id": 230000,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 7484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070552, "tid": 2070552, + "ts": 5327096654556.672, "dur": 12588.797, + "args": { + "External id": 230001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 7485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070552, "tid": 2070552, + "ts": 5327096654558.280, "dur": 12586.263, + "args": { + "External id": 230002,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 7486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096667155.925, "dur": 6.695, + "args": { + "External id": 230003,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096667159.791, "dur": 0.931, + "args": { + "External id": 230004,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096667168.911, "dur": 108.024, + "args": { + "External id": 230005,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096667172.876, "dur": 7.507, + "args": { + "External id": 230006,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096667175.901, "dur": 3.318, + "args": { + "External id": 230007,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096667178.152, "dur": 0.737, + "args": { + "External id": 230008,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096667182.324, "dur": 93.903, + "args": { + "External id": 230009,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096667185.042, "dur": 90.235, + "args": { + "External id": 230010,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096667280.572, "dur": 4.414, + "args": { + "External id": 230011,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096667282.912, "dur": 0.709, + "args": { + "External id": 230012,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096667294.715, "dur": 2.652, + "args": { + "External id": 230013,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096667306.575, "dur": 6.356, + "args": { + "External id": 230014,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096667308.650, "dur": 4.015, + "args": { + "External id": 230015,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096667463.415, "dur": 265.991, + "args": { + "External id": 230016,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096667468.209, "dur": 2.131, + "args": { + "External id": 230017,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096667472.217, "dur": 256.443, + "args": { + "External id": 230018,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096667476.819, "dur": 0.471, + "args": { + "External id": 230019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096667479.880, "dur": 25.058, + "args": { + "External id": 230020,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096667507.332, "dur": 6.042, + "args": { + "External id": 230021,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096667512.207, "dur": 0.853, + "args": { + "External id": 230022,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096667515.440, "dur": 26.345, + "args": { + "External id": 230023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096667516.346, "dur": 1.574, + "args": { + "External id": 230024,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096667519.962, "dur": 21.488, + "args": { + "External id": 230025,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096667524.021, "dur": 3.744, + "args": { + "External id": 230026,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096667544.182, "dur": 23.371, + "args": { + "External id": 230027,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096667570.268, "dur": 18.335, + "args": { + "External id": 230028,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096667594.803, "dur": 16.339, + "args": { + "External id": 230029,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096667613.480, "dur": 49.446, + "args": { + "External id": 230030,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096667666.579, "dur": 24.692, + "args": { + "External id": 230031,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096667669.052, "dur": 1.994, + "args": { + "External id": 230032,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096667673.560, "dur": 0.995, + "args": { + "External id": 230033,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096667693.667, "dur": 13.267, + "args": { + "External id": 230034,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096667711.346, "dur": 16.005, + "args": { + "External id": 230035,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096667738.515, "dur": 2.388, + "args": { + "External id": 230036,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096667748.162, "dur": 5.087, + "args": { + "External id": 230037,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096667750.988, "dur": 0.986, + "args": { + "External id": 230038,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096667834.560, "dur": 69.503, + "args": { + "External id": 230039,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096667909.637, "dur": 5.341, + "args": { + "External id": 230040,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096667912.428, "dur": 1.384, + "args": { + "External id": 230041,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096667916.883, "dur": 28.957, + "args": { + "External id": 230042,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096667954.415, "dur": 6.714, + "args": { + "External id": 230043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096667956.480, "dur": 3.815, + "args": { + "External id": 230044,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096667958.699, "dur": 1.361, + "args": { + "External id": 230045,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096667964.517, "dur": 65.339, + "args": { + "External id": 230046,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096667965.877, "dur": 63.014, + "args": { + "External id": 230047,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096668036.813, "dur": 17.649, + "args": { + "External id": 230048,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096668061.263, "dur": 7.677, + "args": { + "External id": 230049,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096668066.517, "dur": 0.972, + "args": { + "External id": 230050,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096668073.920, "dur": 52.028, + "args": { + "External id": 230051,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096668074.991, "dur": 4.294, + "args": { + "External id": 230052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096668076.031, "dur": 2.656, + "args": { + "External id": 230053,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096668077.821, "dur": 0.724, + "args": { + "External id": 230054,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096668079.831, "dur": 45.784, + "args": { + "External id": 230055,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096668082.668, "dur": 42.410, + "args": { + "External id": 230056,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096668130.294, "dur": 4.001, + "args": { + "External id": 230057,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096668132.471, "dur": 0.740, + "args": { + "External id": 230058,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096668140.883, "dur": 1.887, + "args": { + "External id": 230059,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096668150.856, "dur": 10.588, + "args": { + "External id": 230060,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096668155.438, "dur": 5.674, + "args": { + "External id": 230061,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096668261.300, "dur": 184.339, + "args": { + "External id": 230062,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096668263.367, "dur": 2.064, + "args": { + "External id": 230063,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096668267.263, "dur": 177.880, + "args": { + "External id": 230064,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096668269.382, "dur": 0.484, + "args": { + "External id": 230065,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096668271.242, "dur": 24.026, + "args": { + "External id": 230066,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096668297.046, "dur": 4.071, + "args": { + "External id": 230067,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096668299.561, "dur": 1.036, + "args": { + "External id": 230068,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096668302.294, "dur": 26.037, + "args": { + "External id": 230069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096668303.253, "dur": 1.484, + "args": { + "External id": 230070,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096668308.351, "dur": 19.613, + "args": { + "External id": 230071,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096668313.387, "dur": 2.871, + "args": { + "External id": 230072,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096668330.211, "dur": 20.054, + "args": { + "External id": 230073,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096668352.000, "dur": 13.150, + "args": { + "External id": 230074,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096668368.378, "dur": 13.091, + "args": { + "External id": 230075,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096668382.736, "dur": 13.791, + "args": { + "External id": 230076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096668398.270, "dur": 21.599, + "args": { + "External id": 230077,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096668400.189, "dur": 1.276, + "args": { + "External id": 230078,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096668405.719, "dur": 0.875, + "args": { + "External id": 230079,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096668421.266, "dur": 11.178, + "args": { + "External id": 230080,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096668433.630, "dur": 10.492, + "args": { + "External id": 230081,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096668452.297, "dur": 3.037, + "args": { + "External id": 230082,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096668464.617, "dur": 4.194, + "args": { + "External id": 230083,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096668467.185, "dur": 0.512, + "args": { + "External id": 230084,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096668540.773, "dur": 50.261, + "args": { + "External id": 230085,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096668596.107, "dur": 7.149, + "args": { + "External id": 230086,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096668600.809, "dur": 1.302, + "args": { + "External id": 230087,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096668605.190, "dur": 63.642, + "args": { + "External id": 230088,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096668676.618, "dur": 6.080, + "args": { + "External id": 230089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096668678.318, "dur": 3.499, + "args": { + "External id": 230090,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096668680.443, "dur": 1.136, + "args": { + "External id": 230091,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096668686.324, "dur": 50.257, + "args": { + "External id": 230092,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096668689.361, "dur": 46.591, + "args": { + "External id": 230093,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096668741.234, "dur": 15.949, + "args": { + "External id": 230094,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096668764.034, "dur": 4.372, + "args": { + "External id": 230095,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096668766.160, "dur": 0.842, + "args": { + "External id": 230096,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096668772.773, "dur": 52.033, + "args": { + "External id": 230097,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096668774.057, "dur": 8.313, + "args": { + "External id": 230098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096668774.897, "dur": 6.897, + "args": { + "External id": 230099,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096668778.707, "dur": 2.786, + "args": { + "External id": 230100,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096668783.152, "dur": 41.008, + "args": { + "External id": 230101,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096668784.134, "dur": 39.514, + "args": { + "External id": 230102,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096668829.556, "dur": 4.313, + "args": { + "External id": 230103,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096668831.732, "dur": 1.075, + "args": { + "External id": 230104,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096668840.378, "dur": 1.513, + "args": { + "External id": 230105,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096668850.281, "dur": 8.379, + "args": { + "External id": 230106,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096668854.335, "dur": 4.042, + "args": { + "External id": 230107,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096668948.169, "dur": 309.160, + "args": { + "External id": 230108,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096668950.951, "dur": 1.823, + "args": { + "External id": 230109,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096668954.388, "dur": 302.412, + "args": { + "External id": 230110,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096668955.959, "dur": 0.272, + "args": { + "External id": 230111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096668959.878, "dur": 38.941, + "args": { + "External id": 230112,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096669001.701, "dur": 3.907, + "args": { + "External id": 230113,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096669004.155, "dur": 1.164, + "args": { + "External id": 230114,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096669006.625, "dur": 39.240, + "args": { + "External id": 230115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096669009.984, "dur": 3.524, + "args": { + "External id": 230116,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096669014.918, "dur": 30.585, + "args": { + "External id": 230117,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096669018.424, "dur": 2.454, + "args": { + "External id": 230118,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096669047.413, "dur": 36.546, + "args": { + "External id": 230119,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096669085.662, "dur": 43.496, + "args": { + "External id": 230120,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096669131.924, "dur": 33.447, + "args": { + "External id": 230121,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096669166.809, "dur": 29.819, + "args": { + "External id": 230122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096669198.548, "dur": 25.943, + "args": { + "External id": 230123,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096669200.287, "dur": 1.113, + "args": { + "External id": 230124,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096669205.295, "dur": 0.907, + "args": { + "External id": 230125,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096669225.980, "dur": 13.283, + "args": { + "External id": 230126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096669240.460, "dur": 15.310, + "args": { + "External id": 230127,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096669265.297, "dur": 2.299, + "args": { + "External id": 230128,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096669276.642, "dur": 3.783, + "args": { + "External id": 230129,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096669278.961, "dur": 0.595, + "args": { + "External id": 230130,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096669351.757, "dur": 53.455, + "args": { + "External id": 230131,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096669412.173, "dur": 4.475, + "args": { + "External id": 230132,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096669414.761, "dur": 0.920, + "args": { + "External id": 230133,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096669417.991, "dur": 23.880, + "args": { + "External id": 230134,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096669446.830, "dur": 7.009, + "args": { + "External id": 230135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096669448.240, "dur": 4.892, + "args": { + "External id": 230136,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096669450.160, "dur": 2.742, + "args": { + "External id": 230137,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096669458.772, "dur": 39.631, + "args": { + "External id": 230138,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096669459.795, "dur": 38.057, + "args": { + "External id": 230139,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096669502.514, "dur": 14.603, + "args": { + "External id": 230140,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096669522.261, "dur": 4.069, + "args": { + "External id": 230141,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096669524.381, "dur": 0.827, + "args": { + "External id": 230142,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096669532.766, "dur": 47.299, + "args": { + "External id": 230143,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096669533.689, "dur": 5.365, + "args": { + "External id": 230144,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096669536.214, "dur": 2.238, + "args": { + "External id": 230145,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096669537.737, "dur": 0.568, + "args": { + "External id": 230146,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096669539.541, "dur": 40.122, + "args": { + "External id": 230147,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096669540.103, "dur": 39.055, + "args": { + "External id": 230148,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096669583.985, "dur": 3.625, + "args": { + "External id": 230149,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096669585.743, "dur": 0.728, + "args": { + "External id": 230150,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096669595.544, "dur": 1.339, + "args": { + "External id": 230151,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096669604.141, "dur": 7.128, + "args": { + "External id": 230152,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096669605.930, "dur": 5.059, + "args": { + "External id": 230153,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096669741.187, "dur": 181.090, + "args": { + "External id": 230154,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096669743.700, "dur": 3.202, + "args": { + "External id": 230155,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096669748.361, "dur": 173.286, + "args": { + "External id": 230156,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096669749.990, "dur": 0.326, + "args": { + "External id": 230157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096669753.620, "dur": 23.879, + "args": { + "External id": 230158,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096669779.115, "dur": 3.988, + "args": { + "External id": 230159,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096669781.654, "dur": 1.162, + "args": { + "External id": 230160,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096669783.908, "dur": 22.400, + "args": { + "External id": 230161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096669784.845, "dur": 1.351, + "args": { + "External id": 230162,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096669787.635, "dur": 18.373, + "args": { + "External id": 230163,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096669792.295, "dur": 2.642, + "args": { + "External id": 230164,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096669807.524, "dur": 19.770, + "args": { + "External id": 230165,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096669828.868, "dur": 12.407, + "args": { + "External id": 230166,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096669843.786, "dur": 14.138, + "args": { + "External id": 230167,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096669861.552, "dur": 13.109, + "args": { + "External id": 230168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096669876.474, "dur": 19.292, + "args": { + "External id": 230169,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096669878.092, "dur": 1.074, + "args": { + "External id": 230170,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096669880.907, "dur": 0.852, + "args": { + "External id": 230171,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096669897.009, "dur": 12.226, + "args": { + "External id": 230172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096669910.229, "dur": 10.634, + "args": { + "External id": 230173,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096669930.414, "dur": 2.345, + "args": { + "External id": 230174,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096669941.454, "dur": 3.392, + "args": { + "External id": 230175,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096669943.620, "dur": 0.387, + "args": { + "External id": 230176,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096670025.094, "dur": 55.359, + "args": { + "External id": 230177,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096670085.794, "dur": 5.315, + "args": { + "External id": 230178,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096670089.092, "dur": 0.757, + "args": { + "External id": 230179,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096670092.484, "dur": 22.731, + "args": { + "External id": 230180,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096670120.145, "dur": 7.713, + "args": { + "External id": 230181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096670124.095, "dur": 3.072, + "args": { + "External id": 230182,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096670125.910, "dur": 1.042, + "args": { + "External id": 230183,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096670130.866, "dur": 37.945, + "args": { + "External id": 230184,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096670132.097, "dur": 36.173, + "args": { + "External id": 230185,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096670172.897, "dur": 16.292, + "args": { + "External id": 230186,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096670196.568, "dur": 6.184, + "args": { + "External id": 230187,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096670200.870, "dur": 0.711, + "args": { + "External id": 230188,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096670206.663, "dur": 44.748, + "args": { + "External id": 230189,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096670207.412, "dur": 3.776, + "args": { + "External id": 230190,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096670208.117, "dur": 2.552, + "args": { + "External id": 230191,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096670209.561, "dur": 0.969, + "args": { + "External id": 230192,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096670211.814, "dur": 39.214, + "args": { + "External id": 230193,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096670212.442, "dur": 38.092, + "args": { + "External id": 230194,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096670255.851, "dur": 6.581, + "args": { + "External id": 230195,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096670258.840, "dur": 2.438, + "args": { + "External id": 230196,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096670271.821, "dur": 1.642, + "args": { + "External id": 230197,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096670280.718, "dur": 5.030, + "args": { + "External id": 230198,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096670282.249, "dur": 3.251, + "args": { + "External id": 230199,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096670363.165, "dur": 175.404, + "args": { + "External id": 230200,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096670365.233, "dur": 1.975, + "args": { + "External id": 230201,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096670371.081, "dur": 167.166, + "args": { + "External id": 230202,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096670372.324, "dur": 0.247, + "args": { + "External id": 230203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096670373.831, "dur": 20.161, + "args": { + "External id": 230204,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096670395.795, "dur": 2.850, + "args": { + "External id": 230205,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096670397.586, "dur": 0.832, + "args": { + "External id": 230206,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096670399.404, "dur": 19.844, + "args": { + "External id": 230207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096670400.652, "dur": 1.262, + "args": { + "External id": 230208,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096670402.823, "dur": 16.138, + "args": { + "External id": 230209,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096670406.668, "dur": 2.360, + "args": { + "External id": 230210,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096670420.666, "dur": 19.681, + "args": { + "External id": 230211,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096670442.286, "dur": 13.161, + "args": { + "External id": 230212,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096670459.563, "dur": 13.567, + "args": { + "External id": 230213,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096670474.591, "dur": 13.205, + "args": { + "External id": 230214,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096670489.334, "dur": 19.514, + "args": { + "External id": 230215,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096670491.236, "dur": 1.148, + "args": { + "External id": 230216,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096670494.257, "dur": 0.623, + "args": { + "External id": 230217,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096670510.236, "dur": 12.616, + "args": { + "External id": 230218,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096670526.072, "dur": 11.381, + "args": { + "External id": 230219,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096670544.264, "dur": 1.532, + "args": { + "External id": 230220,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096670553.859, "dur": 3.434, + "args": { + "External id": 230221,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096670556.076, "dur": 0.377, + "args": { + "External id": 230222,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096670661.325, "dur": 56.387, + "args": { + "External id": 230223,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096670723.699, "dur": 6.384, + "args": { + "External id": 230224,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096670727.230, "dur": 1.395, + "args": { + "External id": 230225,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096670731.296, "dur": 24.007, + "args": { + "External id": 230226,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096670761.899, "dur": 5.280, + "args": { + "External id": 230227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096670763.361, "dur": 3.133, + "args": { + "External id": 230228,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096670765.312, "dur": 0.978, + "args": { + "External id": 230229,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096670770.427, "dur": 38.430, + "args": { + "External id": 230230,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096670771.536, "dur": 36.851, + "args": { + "External id": 230231,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096670812.966, "dur": 13.655, + "args": { + "External id": 230232,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096670832.305, "dur": 6.268, + "args": { + "External id": 230233,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096670836.690, "dur": 0.823, + "args": { + "External id": 230234,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096670842.311, "dur": 49.779, + "args": { + "External id": 230235,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096670843.111, "dur": 5.245, + "args": { + "External id": 230236,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096670843.847, "dur": 3.994, + "args": { + "External id": 230237,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096670847.146, "dur": 0.583, + "args": { + "External id": 230238,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096670849.029, "dur": 42.733, + "args": { + "External id": 230239,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096670851.657, "dur": 39.662, + "args": { + "External id": 230240,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096670895.930, "dur": 4.090, + "args": { + "External id": 230241,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096670897.896, "dur": 1.013, + "args": { + "External id": 230242,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096670906.421, "dur": 1.451, + "args": { + "External id": 230243,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096670914.937, "dur": 7.028, + "args": { + "External id": 230244,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096670918.642, "dur": 3.070, + "args": { + "External id": 230245,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096671021.710, "dur": 169.441, + "args": { + "External id": 230246,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096671024.137, "dur": 3.286, + "args": { + "External id": 230247,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096671029.090, "dur": 161.696, + "args": { + "External id": 230248,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096671032.356, "dur": 0.386, + "args": { + "External id": 230249,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096671033.870, "dur": 20.869, + "args": { + "External id": 230250,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096671056.423, "dur": 3.228, + "args": { + "External id": 230251,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096671058.520, "dur": 0.803, + "args": { + "External id": 230252,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096671060.574, "dur": 24.441, + "args": { + "External id": 230253,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096671063.686, "dur": 1.199, + "args": { + "External id": 230254,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096671067.997, "dur": 16.707, + "args": { + "External id": 230255,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671070.728, "dur": 2.891, + "args": { + "External id": 230256,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096671086.582, "dur": 17.725, + "args": { + "External id": 230257,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671106.028, "dur": 12.071, + "args": { + "External id": 230258,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096671120.369, "dur": 11.670, + "args": { + "External id": 230259,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671133.273, "dur": 11.078, + "args": { + "External id": 230260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096671146.158, "dur": 20.001, + "args": { + "External id": 230261,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671148.295, "dur": 1.172, + "args": { + "External id": 230262,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096671153.484, "dur": 0.693, + "args": { + "External id": 230263,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671167.348, "dur": 10.709, + "args": { + "External id": 230264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671179.176, "dur": 10.586, + "args": { + "External id": 230265,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096671197.369, "dur": 1.999, + "args": { + "External id": 230266,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096671208.430, "dur": 3.571, + "args": { + "External id": 230267,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096671210.674, "dur": 0.516, + "args": { + "External id": 230268,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096671276.017, "dur": 48.789, + "args": { + "External id": 230269,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096671329.752, "dur": 8.188, + "args": { + "External id": 230270,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096671334.357, "dur": 2.466, + "args": { + "External id": 230271,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671339.075, "dur": 21.737, + "args": { + "External id": 230272,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096671364.786, "dur": 5.468, + "args": { + "External id": 230273,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096671366.387, "dur": 2.987, + "args": { + "External id": 230274,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096671368.413, "dur": 0.758, + "args": { + "External id": 230275,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096671372.555, "dur": 39.601, + "args": { + "External id": 230276,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096671375.129, "dur": 36.484, + "args": { + "External id": 230277,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671416.004, "dur": 12.865, + "args": { + "External id": 230278,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096671433.865, "dur": 4.106, + "args": { + "External id": 230279,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096671436.156, "dur": 0.773, + "args": { + "External id": 230280,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096671441.429, "dur": 47.116, + "args": { + "External id": 230281,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096671442.227, "dur": 5.680, + "args": { + "External id": 230282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096671442.966, "dur": 4.445, + "args": { + "External id": 230283,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096671446.122, "dur": 1.163, + "args": { + "External id": 230284,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096671448.549, "dur": 39.575, + "args": { + "External id": 230285,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096671448.967, "dur": 38.604, + "args": { + "External id": 230286,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096671492.542, "dur": 3.686, + "args": { + "External id": 230287,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096671494.540, "dur": 0.595, + "args": { + "External id": 230288,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096671501.682, "dur": 1.327, + "args": { + "External id": 230289,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096671509.482, "dur": 6.472, + "args": { + "External id": 230290,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096671512.894, "dur": 2.792, + "args": { + "External id": 230291,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096671590.846, "dur": 221.855, + "args": { + "External id": 230292,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096671592.662, "dur": 3.452, + "args": { + "External id": 230293,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096671597.405, "dur": 214.787, + "args": { + "External id": 230294,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096671598.555, "dur": 0.326, + "args": { + "External id": 230295,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096671599.768, "dur": 20.288, + "args": { + "External id": 230296,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096671621.476, "dur": 43.341, + "args": { + "External id": 230297,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096671663.256, "dur": 0.969, + "args": { + "External id": 230298,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096671666.030, "dur": 27.412, + "args": { + "External id": 230299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096671669.493, "dur": 2.378, + "args": { + "External id": 230300,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096671673.285, "dur": 19.867, + "args": { + "External id": 230301,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671675.877, "dur": 3.073, + "args": { + "External id": 230302,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096671694.990, "dur": 19.530, + "args": { + "External id": 230303,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671716.249, "dur": 12.126, + "args": { + "External id": 230304,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096671730.943, "dur": 13.766, + "args": { + "External id": 230305,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671745.802, "dur": 13.028, + "args": { + "External id": 230306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096671760.597, "dur": 24.336, + "args": { + "External id": 230307,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671764.410, "dur": 0.885, + "args": { + "External id": 230308,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096671769.401, "dur": 0.760, + "args": { + "External id": 230309,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671786.399, "dur": 12.424, + "args": { + "External id": 230310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671799.824, "dur": 11.398, + "args": { + "External id": 230311,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096671820.506, "dur": 2.304, + "args": { + "External id": 230312,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096671832.559, "dur": 3.881, + "args": { + "External id": 230313,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096671834.934, "dur": 0.463, + "args": { + "External id": 230314,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096671903.860, "dur": 57.857, + "args": { + "External id": 230315,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096671968.873, "dur": 5.053, + "args": { + "External id": 230316,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096671971.823, "dur": 1.004, + "args": { + "External id": 230317,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096671975.295, "dur": 44.583, + "args": { + "External id": 230318,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096672026.581, "dur": 6.407, + "args": { + "External id": 230319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096672028.183, "dur": 3.867, + "args": { + "External id": 230320,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096672030.229, "dur": 1.613, + "args": { + "External id": 230321,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096672038.779, "dur": 49.470, + "args": { + "External id": 230322,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096672040.106, "dur": 47.523, + "args": { + "External id": 230323,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096672092.622, "dur": 15.741, + "args": { + "External id": 230324,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096672114.567, "dur": 4.583, + "args": { + "External id": 230325,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096672117.134, "dur": 0.716, + "args": { + "External id": 230326,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096672123.380, "dur": 48.582, + "args": { + "External id": 230327,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096672124.245, "dur": 5.496, + "args": { + "External id": 230328,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096672127.140, "dur": 2.043, + "args": { + "External id": 230329,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096672128.391, "dur": 0.649, + "args": { + "External id": 230330,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096672130.346, "dur": 41.201, + "args": { + "External id": 230331,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096672130.815, "dur": 40.034, + "args": { + "External id": 230332,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096672175.503, "dur": 3.940, + "args": { + "External id": 230333,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096672177.441, "dur": 0.859, + "args": { + "External id": 230334,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096672187.401, "dur": 1.622, + "args": { + "External id": 230335,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096672196.854, "dur": 5.264, + "args": { + "External id": 230336,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096672198.564, "dur": 3.279, + "args": { + "External id": 230337,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096672282.607, "dur": 181.856, + "args": { + "External id": 230338,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096672286.253, "dur": 2.040, + "args": { + "External id": 230339,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096672289.681, "dur": 174.183, + "args": { + "External id": 230340,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096672290.642, "dur": 0.454, + "args": { + "External id": 230341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096672294.646, "dur": 22.944, + "args": { + "External id": 230342,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096672319.295, "dur": 4.649, + "args": { + "External id": 230343,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096672322.925, "dur": 0.795, + "args": { + "External id": 230344,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096672324.847, "dur": 21.041, + "args": { + "External id": 230345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096672326.085, "dur": 1.359, + "args": { + "External id": 230346,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096672328.618, "dur": 16.984, + "args": { + "External id": 230347,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096672330.781, "dur": 3.306, + "args": { + "External id": 230348,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096672347.217, "dur": 17.415, + "args": { + "External id": 230349,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096672365.881, "dur": 15.890, + "args": { + "External id": 230350,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096672384.249, "dur": 13.660, + "args": { + "External id": 230351,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096672401.328, "dur": 13.717, + "args": { + "External id": 230352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096672416.980, "dur": 19.906, + "args": { + "External id": 230353,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096672418.627, "dur": 0.951, + "args": { + "External id": 230354,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096672421.372, "dur": 0.671, + "args": { + "External id": 230355,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096672438.159, "dur": 13.046, + "args": { + "External id": 230356,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096672452.110, "dur": 10.746, + "args": { + "External id": 230357,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096672472.146, "dur": 1.686, + "args": { + "External id": 230358,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096672481.843, "dur": 3.665, + "args": { + "External id": 230359,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096672484.142, "dur": 0.440, + "args": { + "External id": 230360,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096672546.046, "dur": 44.359, + "args": { + "External id": 230361,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096672594.917, "dur": 4.589, + "args": { + "External id": 230362,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096672597.572, "dur": 0.897, + "args": { + "External id": 230363,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096672600.871, "dur": 20.955, + "args": { + "External id": 230364,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096672665.276, "dur": 8.988, + "args": { + "External id": 230365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096672669.523, "dur": 3.910, + "args": { + "External id": 230366,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096672671.715, "dur": 1.450, + "args": { + "External id": 230367,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096672677.305, "dur": 45.017, + "args": { + "External id": 230368,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096672678.556, "dur": 43.119, + "args": { + "External id": 230369,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096672726.559, "dur": 14.677, + "args": { + "External id": 230370,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096672746.429, "dur": 25.699, + "args": { + "External id": 230371,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096672748.613, "dur": 23.152, + "args": { + "External id": 230372,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096672753.499, "dur": 2.017, + "args": { + "External id": 230373,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 7857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327096672778.868, "dur": 30.340, + "args": { + "External id": 230374,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5327096672782.393, "dur": 26.599, + "args": { + "External id": 230375,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 7859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096672786.854, "dur": 3.471, + "args": { + "External id": 230376,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096672791.427, "dur": 17.136, + "args": { + "External id": 230377,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2070552, + "ts": 5327096672822.515, "dur": 4.942, + "args": { + "External id": 230378,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2070552, + "ts": 5327096672824.365, "dur": 2.838, + "args": { + "External id": 230379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2070552, + "ts": 5327096672828.494, "dur": 0.914, + "args": { + "External id": 230380,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2070552, + "ts": 5327096672828.898, "dur": 0.436, + "args": { + "External id": 230381,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096672878.687, "dur": 3.344, + "args": { + "External id": 230382,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32000]", "5", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096673044.365, "dur": 8.501, + "args": { + "External id": 230383,"Sequence number": 959172, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7867 + } + }, + { + "ph": "s", "id": 4, "pid": 2070552, "tid": 2070552, "ts": 5327096673044.365, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096673055.516, "dur": 1.041, + "args": { + "External id": 230384,"Sequence number": 959173, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[131072000, 32000, 1], []], "Input Dims": [[16, 4096, 32000], []], "Ev Idx": 7868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearListNetFunction", "pid": 2070552, "tid": 2070552, + "ts": 5327096673089.718, "dur": 8235.233, + "args": { + "External id": 230385,"Sequence number": 959173, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "1.", "8"], "Input type": ["c10::BFloat16", "c10::Half", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [32000, 1], [2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 32000], [32000, 2048], [], [], [], []], "Ev Idx": 7869 + } + }, + { + "ph": "s", "id": 3, "pid": 2070552, "tid": 2070552, "ts": 5327096673089.718, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096673104.242, "dur": 39.104, + "args": { + "External id": 230386,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096673106.147, "dur": 11.024, + "args": { + "External id": 230387,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096673108.694, "dur": 7.925, + "args": { + "External id": 230388,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673121.253, "dur": 21.633, + "args": { + "External id": 230389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673123.463, "dur": 18.923, + "args": { + "External id": 230390,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 7874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096673146.728, "dur": 22.955, + "args": { + "External id": 230391,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096673147.524, "dur": 3.662, + "args": { + "External id": 230392,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096673148.908, "dur": 1.989, + "args": { + "External id": 230393,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673154.992, "dur": 14.509, + "args": { + "External id": 230394,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673155.664, "dur": 13.325, + "args": { + "External id": 230395,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 7879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2070552, "tid": 2070552, + "ts": 5327096673176.813, "dur": 24.166, + "args": { + "External id": 230396,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 7880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096673178.742, "dur": 4.164, + "args": { + "External id": 230397,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673183.470, "dur": 17.219, + "args": { + "External id": 230398,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 7882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673188.095, "dur": 12.258, + "args": { + "External id": 230399,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096673206.849, "dur": 33.588, + "args": { + "External id": 230400,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096673236.873, "dur": 1.444, + "args": { + "External id": 230401,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096673245.236, "dur": 82.595, + "args": { + "External id": 230402,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096673246.417, "dur": 6.968, + "args": { + "External id": 230403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096673247.958, "dur": 4.542, + "args": { + "External id": 230404,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096673251.499, "dur": 0.825, + "args": { + "External id": 230405,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096673254.379, "dur": 72.758, + "args": { + "External id": 230406,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096673255.660, "dur": 70.448, + "args": { + "External id": 230407,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096673331.084, "dur": 3.917, + "args": { + "External id": 230408,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 7892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096673333.399, "dur": 0.705, + "args": { + "External id": 230409,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "0"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 7893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096673339.414, "dur": 1.593, + "args": { + "External id": 230410,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096673348.695, "dur": 5.159, + "args": { + "External id": 230411,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096673350.393, "dur": 3.197, + "args": { + "External id": 230412,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096673442.554, "dur": 231.843, + "args": { + "External id": 230413,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096673444.582, "dur": 3.551, + "args": { + "External id": 230414,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096673450.067, "dur": 223.527, + "args": { + "External id": 230415,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096673451.321, "dur": 0.387, + "args": { + "External id": 230416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096673455.104, "dur": 23.960, + "args": { + "External id": 230417,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096673480.832, "dur": 4.901, + "args": { + "External id": 230418,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096673484.612, "dur": 0.912, + "args": { + "External id": 230419,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096673486.786, "dur": 21.012, + "args": { + "External id": 230420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096673487.762, "dur": 1.241, + "args": { + "External id": 230421,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096673490.135, "dur": 17.383, + "args": { + "External id": 230422,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673493.651, "dur": 3.558, + "args": { + "External id": 230423,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096673509.205, "dur": 21.683, + "args": { + "External id": 230424,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673532.906, "dur": 12.629, + "args": { + "External id": 230425,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096673550.555, "dur": 14.102, + "args": { + "External id": 230426,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673568.574, "dur": 14.656, + "args": { + "External id": 230427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096673584.883, "dur": 21.249, + "args": { + "External id": 230428,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673588.626, "dur": 1.286, + "args": { + "External id": 230429,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096673591.928, "dur": 0.562, + "args": { + "External id": 230430,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673607.401, "dur": 11.520, + "args": { + "External id": 230431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673620.105, "dur": 51.153, + "args": { + "External id": 230432,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096673685.053, "dur": 2.458, + "args": { + "External id": 230433,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096673695.122, "dur": 1.448, + "args": { + "External id": 230434,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096673702.589, "dur": 4.798, + "args": { + "External id": 230435,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096673704.026, "dur": 3.119, + "args": { + "External id": 230436,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096673787.448, "dur": 174.098, + "args": { + "External id": 230437,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096673789.023, "dur": 2.118, + "args": { + "External id": 230438,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096673794.765, "dur": 166.502, + "args": { + "External id": 230439,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096673795.856, "dur": 0.359, + "args": { + "External id": 230440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096673796.887, "dur": 21.059, + "args": { + "External id": 230441,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096673819.356, "dur": 2.903, + "args": { + "External id": 230442,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096673821.081, "dur": 0.951, + "args": { + "External id": 230443,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096673822.839, "dur": 22.519, + "args": { + "External id": 230444,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096673823.822, "dur": 1.789, + "args": { + "External id": 230445,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096673826.604, "dur": 18.461, + "args": { + "External id": 230446,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673830.053, "dur": 1.895, + "args": { + "External id": 230447,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096673848.513, "dur": 17.651, + "args": { + "External id": 230448,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673867.473, "dur": 13.215, + "args": { + "External id": 230449,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096673882.829, "dur": 12.985, + "args": { + "External id": 230450,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673896.888, "dur": 12.239, + "args": { + "External id": 230451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096673910.600, "dur": 17.621, + "args": { + "External id": 230452,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673912.087, "dur": 1.056, + "args": { + "External id": 230453,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096673914.673, "dur": 0.759, + "args": { + "External id": 230454,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673929.292, "dur": 11.855, + "args": { + "External id": 230455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096673949.723, "dur": 10.562, + "args": { + "External id": 230456,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096673968.195, "dur": 1.306, + "args": { + "External id": 230457,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096673994.515, "dur": 37.045, + "args": { + "External id": 230458,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096673997.148, "dur": 8.893, + "args": { + "External id": 230459,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 7943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096673999.665, "dur": 5.727, + "args": { + "External id": 230460,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096674008.199, "dur": 22.649, + "args": { + "External id": 230461,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 7945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096674039.021, "dur": 5.265, + "args": { + "External id": 230462,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096674042.102, "dur": 1.023, + "args": { + "External id": 230463,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096674118.458, "dur": 85.810, + "args": { + "External id": 230464,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096674209.612, "dur": 6.015, + "args": { + "External id": 230465,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096674213.202, "dur": 1.134, + "args": { + "External id": 230466,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096674217.299, "dur": 26.090, + "args": { + "External id": 230467,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096674250.843, "dur": 6.258, + "args": { + "External id": 230468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096674252.788, "dur": 3.558, + "args": { + "External id": 230469,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096674255.200, "dur": 0.941, + "args": { + "External id": 230470,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096674260.235, "dur": 44.077, + "args": { + "External id": 230471,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096674261.663, "dur": 41.983, + "args": { + "External id": 230472,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096674308.748, "dur": 15.307, + "args": { + "External id": 230473,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096674330.027, "dur": 6.625, + "args": { + "External id": 230474,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096674334.302, "dur": 1.026, + "args": { + "External id": 230475,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096674341.279, "dur": 49.278, + "args": { + "External id": 230476,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096674342.280, "dur": 3.744, + "args": { + "External id": 230477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096674343.202, "dur": 2.272, + "args": { + "External id": 230478,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096674344.495, "dur": 0.809, + "args": { + "External id": 230479,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096674346.854, "dur": 43.348, + "args": { + "External id": 230480,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096674349.548, "dur": 40.097, + "args": { + "External id": 230481,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096674394.168, "dur": 3.668, + "args": { + "External id": 230482,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 7966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096674396.254, "dur": 0.723, + "args": { + "External id": 230483,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "262144000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 7967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096674403.651, "dur": 2.303, + "args": { + "External id": 230484,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096674414.593, "dur": 6.546, + "args": { + "External id": 230485,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096674416.523, "dur": 4.319, + "args": { + "External id": 230486,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096674505.210, "dur": 238.394, + "args": { + "External id": 230487,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096674508.921, "dur": 1.671, + "args": { + "External id": 230488,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096674512.475, "dur": 230.656, + "args": { + "External id": 230489,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096674513.676, "dur": 0.309, + "args": { + "External id": 230490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096674515.097, "dur": 19.684, + "args": { + "External id": 230491,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096674538.738, "dur": 4.683, + "args": { + "External id": 230492,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096674542.226, "dur": 0.950, + "args": { + "External id": 230493,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096674544.300, "dur": 23.106, + "args": { + "External id": 230494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096674545.928, "dur": 1.357, + "args": { + "External id": 230495,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096674548.404, "dur": 18.757, + "args": { + "External id": 230496,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096674550.961, "dur": 3.014, + "args": { + "External id": 230497,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096674568.841, "dur": 20.386, + "args": { + "External id": 230498,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096674590.765, "dur": 14.640, + "args": { + "External id": 230499,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096674608.034, "dur": 53.112, + "args": { + "External id": 230500,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096674663.782, "dur": 16.191, + "args": { + "External id": 230501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096674683.799, "dur": 24.107, + "args": { + "External id": 230502,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096674688.060, "dur": 1.611, + "args": { + "External id": 230503,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096674691.767, "dur": 0.903, + "args": { + "External id": 230504,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096674709.318, "dur": 12.780, + "args": { + "External id": 230505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096674723.277, "dur": 18.497, + "args": { + "External id": 230506,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096674751.870, "dur": 3.588, + "args": { + "External id": 230507,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096674768.314, "dur": 1.313, + "args": { + "External id": 230508,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096674775.934, "dur": 4.489, + "args": { + "External id": 230509,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096674777.214, "dur": 2.988, + "args": { + "External id": 230510,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096674857.867, "dur": 177.063, + "args": { + "External id": 230511,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096674859.430, "dur": 2.054, + "args": { + "External id": 230512,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096674864.989, "dur": 169.417, + "args": { + "External id": 230513,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096674865.915, "dur": 0.226, + "args": { + "External id": 230514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096674867.169, "dur": 19.913, + "args": { + "External id": 230515,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096674888.705, "dur": 3.130, + "args": { + "External id": 230516,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096674890.664, "dur": 0.880, + "args": { + "External id": 230517,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096674892.759, "dur": 19.605, + "args": { + "External id": 230518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096674893.971, "dur": 1.244, + "args": { + "External id": 230519,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096674896.182, "dur": 15.880, + "args": { + "External id": 230520,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096674899.594, "dur": 1.464, + "args": { + "External id": 230521,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096674913.632, "dur": 16.983, + "args": { + "External id": 230522,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096674931.942, "dur": 11.108, + "args": { + "External id": 230523,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096674947.176, "dur": 11.280, + "args": { + "External id": 230524,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096674959.338, "dur": 10.554, + "args": { + "External id": 230525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096674971.157, "dur": 35.668, + "args": { + "External id": 230526,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096674972.996, "dur": 1.232, + "args": { + "External id": 230527,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096674991.577, "dur": 1.020, + "args": { + "External id": 230528,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675008.577, "dur": 10.563, + "args": { + "External id": 230529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675022.430, "dur": 10.582, + "args": { + "External id": 230530,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096675041.192, "dur": 1.878, + "args": { + "External id": 230531,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096675051.578, "dur": 23.838, + "args": { + "External id": 230532,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096675054.377, "dur": 6.373, + "args": { + "External id": 230533,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096675056.419, "dur": 3.928, + "args": { + "External id": 230534,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096675062.170, "dur": 12.524, + "args": { + "External id": 230535,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096675082.621, "dur": 5.244, + "args": { + "External id": 230536,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096675085.550, "dur": 1.232, + "args": { + "External id": 230537,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096675156.509, "dur": 62.407, + "args": { + "External id": 230538,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096675224.491, "dur": 7.557, + "args": { + "External id": 230539,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096675228.238, "dur": 2.637, + "args": { + "External id": 230540,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675236.169, "dur": 26.204, + "args": { + "External id": 230541,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096675267.011, "dur": 5.511, + "args": { + "External id": 230542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096675268.816, "dur": 2.948, + "args": { + "External id": 230543,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096675270.592, "dur": 0.936, + "args": { + "External id": 230544,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096675275.605, "dur": 41.946, + "args": { + "External id": 230545,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096675276.729, "dur": 40.234, + "args": { + "External id": 230546,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675322.438, "dur": 14.129, + "args": { + "External id": 230547,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096675344.257, "dur": 4.311, + "args": { + "External id": 230548,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096675346.438, "dur": 0.839, + "args": { + "External id": 230549,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096675352.965, "dur": 49.975, + "args": { + "External id": 230550,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096675353.887, "dur": 3.300, + "args": { + "External id": 230551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096675354.594, "dur": 2.015, + "args": { + "External id": 230552,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096675355.752, "dur": 0.719, + "args": { + "External id": 230553,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096675359.916, "dur": 42.610, + "args": { + "External id": 230554,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096675360.836, "dur": 41.051, + "args": { + "External id": 230555,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096675406.590, "dur": 8.546, + "args": { + "External id": 230556,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096675413.271, "dur": 0.924, + "args": { + "External id": 230557,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "524288000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096675420.620, "dur": 2.044, + "args": { + "External id": 230558,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096675431.358, "dur": 5.938, + "args": { + "External id": 230559,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096675433.252, "dur": 3.765, + "args": { + "External id": 230560,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096675513.738, "dur": 229.764, + "args": { + "External id": 230561,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096675517.424, "dur": 1.942, + "args": { + "External id": 230562,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096675520.903, "dur": 222.086, + "args": { + "External id": 230563,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096675522.204, "dur": 0.519, + "args": { + "External id": 230564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096675523.672, "dur": 19.791, + "args": { + "External id": 230565,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096675545.143, "dur": 6.044, + "args": { + "External id": 230566,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096675549.666, "dur": 1.330, + "args": { + "External id": 230567,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096675554.159, "dur": 21.433, + "args": { + "External id": 230568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096675556.166, "dur": 1.508, + "args": { + "External id": 230569,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096675558.662, "dur": 16.679, + "args": { + "External id": 230570,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675561.115, "dur": 2.201, + "args": { + "External id": 230571,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096675576.721, "dur": 20.940, + "args": { + "External id": 230572,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675599.346, "dur": 14.610, + "args": { + "External id": 230573,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096675616.562, "dur": 57.524, + "args": { + "External id": 230574,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675676.528, "dur": 14.444, + "args": { + "External id": 230575,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096675693.062, "dur": 24.550, + "args": { + "External id": 230576,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675697.699, "dur": 2.047, + "args": { + "External id": 230577,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096675702.056, "dur": 0.890, + "args": { + "External id": 230578,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675719.109, "dur": 10.938, + "args": { + "External id": 230579,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675730.946, "dur": 10.732, + "args": { + "External id": 230580,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096675750.965, "dur": 2.301, + "args": { + "External id": 230581,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096675764.250, "dur": 1.204, + "args": { + "External id": 230582,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096675771.555, "dur": 4.238, + "args": { + "External id": 230583,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096675772.892, "dur": 2.683, + "args": { + "External id": 230584,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096675846.852, "dur": 166.210, + "args": { + "External id": 230585,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096675848.398, "dur": 2.306, + "args": { + "External id": 230586,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096675851.982, "dur": 160.647, + "args": { + "External id": 230587,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096675853.011, "dur": 0.341, + "args": { + "External id": 230588,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096675856.230, "dur": 16.853, + "args": { + "External id": 230589,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096675874.497, "dur": 3.113, + "args": { + "External id": 230590,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096675876.376, "dur": 1.000, + "args": { + "External id": 230591,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096675878.432, "dur": 18.457, + "args": { + "External id": 230592,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096675879.738, "dur": 1.704, + "args": { + "External id": 230593,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096675882.313, "dur": 14.316, + "args": { + "External id": 230594,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675885.222, "dur": 1.506, + "args": { + "External id": 230595,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096675897.937, "dur": 15.375, + "args": { + "External id": 230596,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675914.563, "dur": 10.689, + "args": { + "External id": 230597,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096675927.303, "dur": 11.567, + "args": { + "External id": 230598,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675939.736, "dur": 10.616, + "args": { + "External id": 230599,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096675953.463, "dur": 15.913, + "args": { + "External id": 230600,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675955.158, "dur": 0.727, + "args": { + "External id": 230601,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096675957.592, "dur": 0.866, + "args": { + "External id": 230602,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675970.452, "dur": 26.656, + "args": { + "External id": 230603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096675999.183, "dur": 12.199, + "args": { + "External id": 230604,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096676021.454, "dur": 2.095, + "args": { + "External id": 230605,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096676031.353, "dur": 28.817, + "args": { + "External id": 230606,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096676034.285, "dur": 7.399, + "args": { + "External id": 230607,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096676036.643, "dur": 4.600, + "args": { + "External id": 230608,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096676043.462, "dur": 16.019, + "args": { + "External id": 230609,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096676066.958, "dur": 4.895, + "args": { + "External id": 230610,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096676069.780, "dur": 0.983, + "args": { + "External id": 230611,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096676138.499, "dur": 63.449, + "args": { + "External id": 230612,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096676207.324, "dur": 5.972, + "args": { + "External id": 230613,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096676210.728, "dur": 1.308, + "args": { + "External id": 230614,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096676214.870, "dur": 25.548, + "args": { + "External id": 230615,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096676244.867, "dur": 8.143, + "args": { + "External id": 230616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096676246.602, "dur": 5.708, + "args": { + "External id": 230617,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096676250.912, "dur": 1.182, + "args": { + "External id": 230618,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096676255.958, "dur": 40.113, + "args": { + "External id": 230619,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096676257.234, "dur": 38.296, + "args": { + "External id": 230620,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096676300.565, "dur": 15.205, + "args": { + "External id": 230621,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096676321.043, "dur": 4.552, + "args": { + "External id": 230622,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096676323.523, "dur": 0.838, + "args": { + "External id": 230623,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096676332.017, "dur": 47.594, + "args": { + "External id": 230624,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096676333.076, "dur": 4.101, + "args": { + "External id": 230625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096676333.771, "dur": 2.827, + "args": { + "External id": 230626,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096676335.811, "dur": 0.613, + "args": { + "External id": 230627,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096676337.796, "dur": 41.437, + "args": { + "External id": 230628,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096676338.574, "dur": 40.177, + "args": { + "External id": 230629,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096676385.597, "dur": 3.508, + "args": { + "External id": 230630,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096676387.743, "dur": 0.536, + "args": { + "External id": 230631,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "786432000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096676393.911, "dur": 1.640, + "args": { + "External id": 230632,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096676401.542, "dur": 6.797, + "args": { + "External id": 230633,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096676405.175, "dur": 2.909, + "args": { + "External id": 230634,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096676482.309, "dur": 217.760, + "args": { + "External id": 230635,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096676484.258, "dur": 2.268, + "args": { + "External id": 230636,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096676489.832, "dur": 209.864, + "args": { + "External id": 230637,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096676491.119, "dur": 0.532, + "args": { + "External id": 230638,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096676492.690, "dur": 21.066, + "args": { + "External id": 230639,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096676515.334, "dur": 4.729, + "args": { + "External id": 230640,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096676517.450, "dur": 2.357, + "args": { + "External id": 230641,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096676520.790, "dur": 22.997, + "args": { + "External id": 230642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096676522.241, "dur": 1.007, + "args": { + "External id": 230643,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096676526.513, "dur": 17.011, + "args": { + "External id": 230644,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096676529.138, "dur": 2.671, + "args": { + "External id": 230645,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096676545.096, "dur": 17.424, + "args": { + "External id": 230646,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096676563.773, "dur": 13.129, + "args": { + "External id": 230647,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096676579.276, "dur": 13.086, + "args": { + "External id": 230648,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096676593.448, "dur": 12.252, + "args": { + "External id": 230649,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096676607.322, "dur": 61.916, + "args": { + "External id": 230650,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096676609.119, "dur": 0.764, + "args": { + "External id": 230651,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096676614.093, "dur": 1.078, + "args": { + "External id": 230652,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096676671.827, "dur": 14.043, + "args": { + "External id": 230653,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096676687.075, "dur": 11.379, + "args": { + "External id": 230654,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096676707.912, "dur": 2.241, + "args": { + "External id": 230655,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096676719.594, "dur": 1.433, + "args": { + "External id": 230656,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096676728.896, "dur": 4.749, + "args": { + "External id": 230657,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096676730.384, "dur": 3.023, + "args": { + "External id": 230658,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096676801.385, "dur": 160.999, + "args": { + "External id": 230659,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096676804.394, "dur": 1.891, + "args": { + "External id": 230660,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096676807.404, "dur": 154.638, + "args": { + "External id": 230661,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096676808.741, "dur": 0.447, + "args": { + "External id": 230662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096676810.193, "dur": 17.743, + "args": { + "External id": 230663,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096676831.157, "dur": 6.857, + "args": { + "External id": 230664,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096676836.908, "dur": 0.764, + "args": { + "External id": 230665,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096676838.692, "dur": 18.340, + "args": { + "External id": 230666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096676839.725, "dur": 1.165, + "args": { + "External id": 230667,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096676841.803, "dur": 15.013, + "args": { + "External id": 230668,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096676843.822, "dur": 2.004, + "args": { + "External id": 230669,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096676860.041, "dur": 17.444, + "args": { + "External id": 230670,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096676878.714, "dur": 10.355, + "args": { + "External id": 230671,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096676891.214, "dur": 11.618, + "args": { + "External id": 230672,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096676903.704, "dur": 10.505, + "args": { + "External id": 230673,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096676917.315, "dur": 18.709, + "args": { + "External id": 230674,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096676920.930, "dur": 0.987, + "args": { + "External id": 230675,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096676923.827, "dur": 0.725, + "args": { + "External id": 230676,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096676937.284, "dur": 11.022, + "args": { + "External id": 230677,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096676949.113, "dur": 11.932, + "args": { + "External id": 230678,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096676967.268, "dur": 1.288, + "args": { + "External id": 230679,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096676975.657, "dur": 43.776, + "args": { + "External id": 230680,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096676993.540, "dur": 7.490, + "args": { + "External id": 230681,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096676995.667, "dur": 4.508, + "args": { + "External id": 230682,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096677002.482, "dur": 16.280, + "args": { + "External id": 230683,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096677027.099, "dur": 4.855, + "args": { + "External id": 230684,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096677029.916, "dur": 0.868, + "args": { + "External id": 230685,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096677100.380, "dur": 64.198, + "args": { + "External id": 230686,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096677169.446, "dur": 5.078, + "args": { + "External id": 230687,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096677172.439, "dur": 0.972, + "args": { + "External id": 230688,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096677176.139, "dur": 24.434, + "args": { + "External id": 230689,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096677205.071, "dur": 7.599, + "args": { + "External id": 230690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096677206.666, "dur": 5.285, + "args": { + "External id": 230691,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096677210.790, "dur": 0.934, + "args": { + "External id": 230692,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096677215.776, "dur": 40.624, + "args": { + "External id": 230693,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096677217.215, "dur": 38.626, + "args": { + "External id": 230694,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096677260.229, "dur": 15.079, + "args": { + "External id": 230695,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096677280.708, "dur": 4.118, + "args": { + "External id": 230696,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096677282.914, "dur": 0.726, + "args": { + "External id": 230697,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096677288.782, "dur": 50.190, + "args": { + "External id": 230698,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096677291.795, "dur": 5.595, + "args": { + "External id": 230699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096677292.629, "dur": 4.136, + "args": { + "External id": 230700,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096677294.226, "dur": 2.333, + "args": { + "External id": 230701,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096677297.964, "dur": 40.677, + "args": { + "External id": 230702,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096677298.665, "dur": 39.444, + "args": { + "External id": 230703,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096677342.823, "dur": 3.365, + "args": { + "External id": 230704,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096677344.668, "dur": 0.585, + "args": { + "External id": 230705,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1048576000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096677353.985, "dur": 1.744, + "args": { + "External id": 230706,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096677362.175, "dur": 5.516, + "args": { + "External id": 230707,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096677363.949, "dur": 3.412, + "args": { + "External id": 230708,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096677441.873, "dur": 174.728, + "args": { + "External id": 230709,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096677443.986, "dur": 1.829, + "args": { + "External id": 230710,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096677449.184, "dur": 166.982, + "args": { + "External id": 230711,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096677450.204, "dur": 0.489, + "args": { + "External id": 230712,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096677451.836, "dur": 22.219, + "args": { + "External id": 230713,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096677475.687, "dur": 3.211, + "args": { + "External id": 230714,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096677477.857, "dur": 0.795, + "args": { + "External id": 230715,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096677479.781, "dur": 21.617, + "args": { + "External id": 230716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096677481.210, "dur": 1.628, + "args": { + "External id": 230717,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096677483.868, "dur": 17.254, + "args": { + "External id": 230718,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096677487.531, "dur": 1.997, + "args": { + "External id": 230719,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096677502.705, "dur": 19.873, + "args": { + "External id": 230720,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096677524.138, "dur": 13.266, + "args": { + "External id": 230721,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096677541.749, "dur": 13.270, + "args": { + "External id": 230722,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096677556.223, "dur": 13.124, + "args": { + "External id": 230723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096677570.713, "dur": 19.266, + "args": { + "External id": 230724,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096677572.510, "dur": 1.622, + "args": { + "External id": 230725,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096677575.936, "dur": 0.770, + "args": { + "External id": 230726,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096677591.001, "dur": 10.961, + "args": { + "External id": 230727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096677605.032, "dur": 10.141, + "args": { + "External id": 230728,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096677661.041, "dur": 2.379, + "args": { + "External id": 230729,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096677675.211, "dur": 1.272, + "args": { + "External id": 230730,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096677682.715, "dur": 4.841, + "args": { + "External id": 230731,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096677684.089, "dur": 3.223, + "args": { + "External id": 230732,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096677762.605, "dur": 160.736, + "args": { + "External id": 230733,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096677764.440, "dur": 1.648, + "args": { + "External id": 230734,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096677767.472, "dur": 155.515, + "args": { + "External id": 230735,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096677770.425, "dur": 0.299, + "args": { + "External id": 230736,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096677771.493, "dur": 17.957, + "args": { + "External id": 230737,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096677790.805, "dur": 3.056, + "args": { + "External id": 230738,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096677792.562, "dur": 1.085, + "args": { + "External id": 230739,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096677794.476, "dur": 22.468, + "args": { + "External id": 230740,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096677797.498, "dur": 1.084, + "args": { + "External id": 230741,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096677799.550, "dur": 17.170, + "args": { + "External id": 230742,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096677804.015, "dur": 1.926, + "args": { + "External id": 230743,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096677818.006, "dur": 16.287, + "args": { + "External id": 230744,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096677835.511, "dur": 12.206, + "args": { + "External id": 230745,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096677849.885, "dur": 12.600, + "args": { + "External id": 230746,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096677863.467, "dur": 12.665, + "args": { + "External id": 230747,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096677877.124, "dur": 18.186, + "args": { + "External id": 230748,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096677878.722, "dur": 0.922, + "args": { + "External id": 230749,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096677881.330, "dur": 0.581, + "args": { + "External id": 230750,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096677898.656, "dur": 11.673, + "args": { + "External id": 230751,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096677911.092, "dur": 11.108, + "args": { + "External id": 230752,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096677927.972, "dur": 1.153, + "args": { + "External id": 230753,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096677937.047, "dur": 26.389, + "args": { + "External id": 230754,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096677939.950, "dur": 7.342, + "args": { + "External id": 230755,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096677942.329, "dur": 4.540, + "args": { + "External id": 230756,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096677949.043, "dur": 13.777, + "args": { + "External id": 230757,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096677970.135, "dur": 4.488, + "args": { + "External id": 230758,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096677972.842, "dur": 0.894, + "args": { + "External id": 230759,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096678059.402, "dur": 69.005, + "args": { + "External id": 230760,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096678134.471, "dur": 6.212, + "args": { + "External id": 230761,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096678137.479, "dur": 1.609, + "args": { + "External id": 230762,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096678144.359, "dur": 25.062, + "args": { + "External id": 230763,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096678174.317, "dur": 6.227, + "args": { + "External id": 230764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096678176.422, "dur": 3.395, + "args": { + "External id": 230765,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096678178.546, "dur": 1.091, + "args": { + "External id": 230766,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096678183.371, "dur": 43.050, + "args": { + "External id": 230767,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096678184.838, "dur": 40.930, + "args": { + "External id": 230768,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096678231.165, "dur": 14.237, + "args": { + "External id": 230769,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096678253.382, "dur": 4.325, + "args": { + "External id": 230770,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096678255.808, "dur": 0.720, + "args": { + "External id": 230771,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096678261.956, "dur": 49.670, + "args": { + "External id": 230772,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096678263.091, "dur": 4.191, + "args": { + "External id": 230773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096678264.026, "dur": 2.661, + "args": { + "External id": 230774,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096678265.708, "dur": 0.848, + "args": { + "External id": 230775,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096678269.655, "dur": 41.661, + "args": { + "External id": 230776,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096678270.466, "dur": 40.373, + "args": { + "External id": 230777,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096678315.116, "dur": 3.787, + "args": { + "External id": 230778,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096678317.033, "dur": 0.956, + "args": { + "External id": 230779,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1310720000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096678324.577, "dur": 1.819, + "args": { + "External id": 230780,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096678334.314, "dur": 5.937, + "args": { + "External id": 230781,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096678335.900, "dur": 4.077, + "args": { + "External id": 230782,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096678414.804, "dur": 163.614, + "args": { + "External id": 230783,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096678418.398, "dur": 1.953, + "args": { + "External id": 230784,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096678421.758, "dur": 156.213, + "args": { + "External id": 230785,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096678422.945, "dur": 0.418, + "args": { + "External id": 230786,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096678426.597, "dur": 19.359, + "args": { + "External id": 230787,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096678447.651, "dur": 4.477, + "args": { + "External id": 230788,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096678450.633, "dur": 1.282, + "args": { + "External id": 230789,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096678452.911, "dur": 19.320, + "args": { + "External id": 230790,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096678454.361, "dur": 1.102, + "args": { + "External id": 230791,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096678456.295, "dur": 15.614, + "args": { + "External id": 230792,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096678458.707, "dur": 2.408, + "args": { + "External id": 230793,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096678473.354, "dur": 17.300, + "args": { + "External id": 230794,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096678491.774, "dur": 12.135, + "args": { + "External id": 230795,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096678506.256, "dur": 11.551, + "args": { + "External id": 230796,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096678521.603, "dur": 10.560, + "args": { + "External id": 230797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096678533.696, "dur": 20.180, + "args": { + "External id": 230798,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096678537.352, "dur": 0.851, + "args": { + "External id": 230799,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096678540.483, "dur": 0.820, + "args": { + "External id": 230800,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096678555.130, "dur": 10.178, + "args": { + "External id": 230801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096678566.295, "dur": 10.852, + "args": { + "External id": 230802,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096678585.828, "dur": 1.584, + "args": { + "External id": 230803,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096678595.912, "dur": 1.229, + "args": { + "External id": 230804,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096678602.171, "dur": 3.718, + "args": { + "External id": 230805,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096678603.489, "dur": 2.104, + "args": { + "External id": 230806,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096678715.077, "dur": 169.463, + "args": { + "External id": 230807,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096678717.445, "dur": 2.731, + "args": { + "External id": 230808,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096678721.372, "dur": 162.811, + "args": { + "External id": 230809,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096678722.477, "dur": 0.407, + "args": { + "External id": 230810,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096678723.821, "dur": 20.009, + "args": { + "External id": 230811,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096678745.031, "dur": 3.610, + "args": { + "External id": 230812,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096678747.393, "dur": 1.021, + "args": { + "External id": 230813,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096678749.215, "dur": 20.498, + "args": { + "External id": 230814,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096678750.416, "dur": 1.735, + "args": { + "External id": 230815,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096678753.338, "dur": 16.130, + "args": { + "External id": 230816,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096678758.498, "dur": 1.604, + "args": { + "External id": 230817,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096678770.670, "dur": 17.626, + "args": { + "External id": 230818,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096678789.534, "dur": 13.077, + "args": { + "External id": 230819,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096678804.847, "dur": 14.026, + "args": { + "External id": 230820,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096678819.971, "dur": 14.841, + "args": { + "External id": 230821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096678836.235, "dur": 18.818, + "args": { + "External id": 230822,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096678837.824, "dur": 1.049, + "args": { + "External id": 230823,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096678840.996, "dur": 0.953, + "args": { + "External id": 230824,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096678858.509, "dur": 11.888, + "args": { + "External id": 230825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096678871.493, "dur": 11.660, + "args": { + "External id": 230826,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096678889.829, "dur": 1.589, + "args": { + "External id": 230827,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096678899.478, "dur": 24.010, + "args": { + "External id": 230828,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096678901.692, "dur": 6.297, + "args": { + "External id": 230829,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096678903.823, "dur": 3.766, + "args": { + "External id": 230830,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096678909.264, "dur": 13.598, + "args": { + "External id": 230831,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096678930.019, "dur": 4.668, + "args": { + "External id": 230832,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096678932.957, "dur": 0.772, + "args": { + "External id": 230833,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096679018.996, "dur": 67.818, + "args": { + "External id": 230834,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096679093.027, "dur": 8.506, + "args": { + "External id": 230835,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096679096.612, "dur": 3.331, + "args": { + "External id": 230836,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096679105.323, "dur": 23.889, + "args": { + "External id": 230837,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096679134.486, "dur": 6.665, + "args": { + "External id": 230838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096679136.863, "dur": 3.566, + "args": { + "External id": 230839,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096679139.011, "dur": 1.209, + "args": { + "External id": 230840,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096679144.320, "dur": 72.133, + "args": { + "External id": 230841,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096679145.601, "dur": 70.197, + "args": { + "External id": 230842,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096679220.685, "dur": 38.912, + "args": { + "External id": 230843,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096679267.806, "dur": 4.080, + "args": { + "External id": 230844,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096679269.947, "dur": 0.729, + "args": { + "External id": 230845,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096679276.155, "dur": 63.448, + "args": { + "External id": 230846,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096679277.500, "dur": 3.781, + "args": { + "External id": 230847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096679278.301, "dur": 2.343, + "args": { + "External id": 230848,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096679279.743, "dur": 0.714, + "args": { + "External id": 230849,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096679284.229, "dur": 55.032, + "args": { + "External id": 230850,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096679284.662, "dur": 54.126, + "args": { + "External id": 230851,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096679343.541, "dur": 3.971, + "args": { + "External id": 230852,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096679345.462, "dur": 1.137, + "args": { + "External id": 230853,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1572864000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096679353.518, "dur": 1.991, + "args": { + "External id": 230854,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096679362.158, "dur": 7.619, + "args": { + "External id": 230855,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096679365.474, "dur": 4.042, + "args": { + "External id": 230856,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096679443.951, "dur": 165.038, + "args": { + "External id": 230857,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096679446.456, "dur": 1.845, + "args": { + "External id": 230858,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096679449.504, "dur": 159.054, + "args": { + "External id": 230859,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096679452.916, "dur": 0.278, + "args": { + "External id": 230860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096679454.392, "dur": 19.799, + "args": { + "External id": 230861,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096679475.982, "dur": 3.891, + "args": { + "External id": 230862,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096679478.333, "dur": 1.334, + "args": { + "External id": 230863,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096679482.704, "dur": 20.168, + "args": { + "External id": 230864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096679485.335, "dur": 1.167, + "args": { + "External id": 230865,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096679487.460, "dur": 15.122, + "args": { + "External id": 230866,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096679490.088, "dur": 2.574, + "args": { + "External id": 230867,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096679503.881, "dur": 17.005, + "args": { + "External id": 230868,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096679522.166, "dur": 11.540, + "args": { + "External id": 230869,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096679535.875, "dur": 12.210, + "args": { + "External id": 230870,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096679549.284, "dur": 11.403, + "args": { + "External id": 230871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096679562.426, "dur": 20.503, + "args": { + "External id": 230872,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096679566.123, "dur": 1.499, + "args": { + "External id": 230873,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096679569.639, "dur": 0.838, + "args": { + "External id": 230874,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096679584.453, "dur": 10.609, + "args": { + "External id": 230875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096679596.091, "dur": 11.463, + "args": { + "External id": 230876,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096679614.583, "dur": 1.459, + "args": { + "External id": 230877,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096679665.851, "dur": 2.006, + "args": { + "External id": 230878,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096679676.385, "dur": 5.604, + "args": { + "External id": 230879,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096679678.319, "dur": 3.424, + "args": { + "External id": 230880,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096679751.310, "dur": 152.775, + "args": { + "External id": 230881,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096679755.129, "dur": 1.841, + "args": { + "External id": 230882,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096679758.227, "dur": 145.550, + "args": { + "External id": 230883,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096679759.505, "dur": 0.288, + "args": { + "External id": 230884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096679761.023, "dur": 17.274, + "args": { + "External id": 230885,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096679781.632, "dur": 5.498, + "args": { + "External id": 230886,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096679785.749, "dur": 1.046, + "args": { + "External id": 230887,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096679787.881, "dur": 17.439, + "args": { + "External id": 230888,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096679788.941, "dur": 1.140, + "args": { + "External id": 230889,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096679791.022, "dur": 14.053, + "args": { + "External id": 230890,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096679793.077, "dur": 1.998, + "args": { + "External id": 230891,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096679806.208, "dur": 14.953, + "args": { + "External id": 230892,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096679822.255, "dur": 12.648, + "args": { + "External id": 230893,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096679836.961, "dur": 11.231, + "args": { + "External id": 230894,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096679849.123, "dur": 10.577, + "args": { + "External id": 230895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096679863.118, "dur": 16.452, + "args": { + "External id": 230896,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096679865.079, "dur": 0.787, + "args": { + "External id": 230897,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096679867.656, "dur": 0.778, + "args": { + "External id": 230898,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096679880.644, "dur": 10.238, + "args": { + "External id": 230899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096679891.674, "dur": 11.216, + "args": { + "External id": 230900,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096679908.939, "dur": 1.295, + "args": { + "External id": 230901,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096679919.316, "dur": 29.667, + "args": { + "External id": 230902,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096679921.891, "dur": 8.640, + "args": { + "External id": 230903,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096679926.602, "dur": 3.514, + "args": { + "External id": 230904,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096679931.823, "dur": 16.380, + "args": { + "External id": 230905,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096679955.737, "dur": 4.630, + "args": { + "External id": 230906,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096679958.716, "dur": 0.643, + "args": { + "External id": 230907,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096680042.235, "dur": 63.614, + "args": { + "External id": 230908,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096680111.380, "dur": 6.222, + "args": { + "External id": 230909,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096680114.557, "dur": 1.394, + "args": { + "External id": 230910,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096680118.988, "dur": 27.209, + "args": { + "External id": 230911,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096680151.870, "dur": 34.811, + "args": { + "External id": 230912,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096680179.789, "dur": 5.931, + "args": { + "External id": 230913,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096680184.671, "dur": 0.882, + "args": { + "External id": 230914,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096680189.852, "dur": 41.270, + "args": { + "External id": 230915,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096680190.911, "dur": 39.679, + "args": { + "External id": 230916,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096680235.255, "dur": 14.481, + "args": { + "External id": 230917,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096680255.281, "dur": 3.903, + "args": { + "External id": 230918,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096680257.501, "dur": 0.503, + "args": { + "External id": 230919,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5327096680263.189, "dur": 49.117, + "args": { + "External id": 230920,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096680266.134, "dur": 4.782, + "args": { + "External id": 230921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096680266.825, "dur": 3.480, + "args": { + "External id": 230922,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096680269.540, "dur": 0.633, + "args": { + "External id": 230923,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096680271.695, "dur": 40.307, + "args": { + "External id": 230924,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096680272.158, "dur": 39.174, + "args": { + "External id": 230925,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096680316.018, "dur": 4.894, + "args": { + "External id": 230926,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096680319.332, "dur": 0.610, + "args": { + "External id": 230927,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1835008000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096680327.196, "dur": 2.039, + "args": { + "External id": 230928,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096680336.204, "dur": 5.471, + "args": { + "External id": 230929,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096680337.971, "dur": 3.447, + "args": { + "External id": 230930,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096680420.877, "dur": 187.798, + "args": { + "External id": 230931,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096680422.828, "dur": 1.859, + "args": { + "External id": 230932,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096680426.142, "dur": 182.176, + "args": { + "External id": 230933,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096680427.006, "dur": 0.330, + "args": { + "External id": 230934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096680431.392, "dur": 23.110, + "args": { + "External id": 230935,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096680456.117, "dur": 3.557, + "args": { + "External id": 230936,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096680458.522, "dur": 0.962, + "args": { + "External id": 230937,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096680460.525, "dur": 23.954, + "args": { + "External id": 230938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096680463.440, "dur": 1.485, + "args": { + "External id": 230939,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096680465.768, "dur": 18.446, + "args": { + "External id": 230940,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096680469.121, "dur": 2.575, + "args": { + "External id": 230941,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096680485.723, "dur": 19.067, + "args": { + "External id": 230942,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096680506.006, "dur": 13.697, + "args": { + "External id": 230943,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096680521.959, "dur": 13.423, + "args": { + "External id": 230944,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096680536.603, "dur": 22.121, + "args": { + "External id": 230945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096680560.401, "dur": 21.984, + "args": { + "External id": 230946,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096680561.844, "dur": 0.913, + "args": { + "External id": 230947,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096680567.311, "dur": 0.821, + "args": { + "External id": 230948,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096680583.421, "dur": 12.349, + "args": { + "External id": 230949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096680596.887, "dur": 10.417, + "args": { + "External id": 230950,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096680614.477, "dur": 1.418, + "args": { + "External id": 230951,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096680662.966, "dur": 1.779, + "args": { + "External id": 230952,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096680673.139, "dur": 5.283, + "args": { + "External id": 230953,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096680674.505, "dur": 3.658, + "args": { + "External id": 230954,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096680753.297, "dur": 164.339, + "args": { + "External id": 230955,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096680755.293, "dur": 1.605, + "args": { + "External id": 230956,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5327096680759.791, "dur": 157.491, + "args": { + "External id": 230957,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5327096680761.096, "dur": 0.350, + "args": { + "External id": 230958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5327096680764.943, "dur": 18.434, + "args": { + "External id": 230959,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5327096680784.731, "dur": 4.986, + "args": { + "External id": 230960,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096680787.174, "dur": 2.243, + "args": { + "External id": 230961,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096680790.531, "dur": 18.318, + "args": { + "External id": 230962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327096680791.173, "dur": 1.248, + "args": { + "External id": 230963,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327096680793.305, "dur": 15.290, + "args": { + "External id": 230964,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096680795.884, "dur": 1.493, + "args": { + "External id": 230965,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327096680809.883, "dur": 17.589, + "args": { + "External id": 230966,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096680828.636, "dur": 12.089, + "args": { + "External id": 230967,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5327096680843.133, "dur": 13.285, + "args": { + "External id": 230968,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5327096680857.498, "dur": 11.472, + "args": { + "External id": 230969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096680872.187, "dur": 19.578, + "args": { + "External id": 230970,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327096680873.566, "dur": 1.113, + "args": { + "External id": 230971,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096680876.442, "dur": 2.287, + "args": { + "External id": 230972,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5327096680892.600, "dur": 11.866, + "args": { + "External id": 230973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096680905.227, "dur": 11.094, + "args": { + "External id": 230974,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327096680922.869, "dur": 1.226, + "args": { + "External id": 230975,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096680932.344, "dur": 23.778, + "args": { + "External id": 230976,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096680934.698, "dur": 6.159, + "args": { + "External id": 230977,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096680937.106, "dur": 3.339, + "args": { + "External id": 230978,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5327096680942.091, "dur": 13.290, + "args": { + "External id": 230979,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096680964.667, "dur": 4.450, + "args": { + "External id": 230980,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096680967.511, "dur": 0.562, + "args": { + "External id": 230981,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096681056.683, "dur": 66.199, + "args": { + "External id": 230982,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5327096681129.076, "dur": 5.875, + "args": { + "External id": 230983,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096681132.467, "dur": 1.007, + "args": { + "External id": 230984,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327096681136.620, "dur": 22.571, + "args": { + "External id": 230985,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5327096681164.684, "dur": 8.107, + "args": { + "External id": 230986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5327096681168.498, "dur": 3.489, + "args": { + "External id": 230987,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096681170.745, "dur": 1.037, + "args": { + "External id": 230988,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5327096681175.743, "dur": 40.209, + "args": { + "External id": 230989,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5327096681177.035, "dur": 38.478, + "args": { + "External id": 230990,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5327096681220.187, "dur": 15.847, + "args": { + "External id": 230991,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096681241.502, "dur": 26.220, + "args": { + "External id": 230992,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5327096681244.004, "dur": 23.324, + "args": { + "External id": 230993,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096681250.861, "dur": 0.645, + "args": { + "External id": 230994,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070552, "tid": 2070552, + "ts": 5327096681278.231, "dur": 18.654, + "args": { + "External id": 230995,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070552, "tid": 2070552, + "ts": 5327096681339.993, "dur": 18.635, + "args": { + "External id": 230996,"Sequence number": 959174, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8480 + } + }, + { + "ph": "s", "id": 2, "pid": 2070552, "tid": 2070552, "ts": 5327096681339.993, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 2070552, "tid": 2070552, + "ts": 5327096681460.886, "dur": 39.289, + "args": { + "External id": 230997,"Record function id": 0, "Ev Idx": 8481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070552, "tid": 2070552, + "ts": 5327096681602.482, "dur": 72.281, + "args": { + "External id": 230998,"Sequence number": 959175, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8482 + } + }, + { + "ph": "s", "id": 1, "pid": 2070552, "tid": 2070552, "ts": 5327096681602.482, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096681708.823, "dur": 36.395, + "args": { + "External id": 230999,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327096681710.538, "dur": 14.650, + "args": { + "External id": 231000,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327096681719.716, "dur": 4.773, + "args": { + "External id": 231001,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327096681726.787, "dur": 18.003, + "args": { + "External id": 231002,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070552, "tid": 2070552, + "ts": 5327098049848.787, "dur": 45.807, + "args": { + "External id": 231003,"Sequence number": 959176, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070552, "tid": 2070552, + "ts": 5327098049902.452, "dur": 17.337, + "args": { + "External id": 231004,"Sequence number": 959177, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070552, "tid": 2070552, + "ts": 5327098049927.684, "dur": 22.840, + "args": { + "External id": 231005,"Sequence number": 959178, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070552, "tid": 2070552, + "ts": 5327098050290.915, "dur": 23.379, + "args": { + "External id": 231006,"Sequence number": 959179, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070552, "tid": 2070552, + "ts": 5327098050320.427, "dur": 39.017, + "args": { + "External id": 231007,"Sequence number": 959180, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2070552, "tid": 2070552, + "ts": 5327098052200.954, "dur": 2950.561, + "args": { + "External id": 231008,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2070552, "tid": 2070552, + "ts": 5327098052817.309, "dur": 904.239, + "args": { + "External id": 231009,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2070552, "tid": 2070552, + "ts": 5327098052840.045, "dur": 69.744, + "args": { + "External id": 231010,"Record function id": 0, "Concrete Inputs": ["[36500]", "6", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 8494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327098052843.349, "dur": 12.894, + "args": { + "External id": 231011,"Record function id": 0, "Concrete Inputs": ["[36500]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070552, "tid": 2070552, + "ts": 5327098052859.125, "dur": 50.382, + "args": { + "External id": 231012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[36500]], "Ev Idx": 8496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5327098052861.002, "dur": 47.881, + "args": { + "External id": 231013,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[36500], []], "Ev Idx": 8497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055183.201, "dur": 3.090, + "args": { + "External id": 231014,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055188.519, "dur": 0.427, + "args": { + "External id": 231015,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055190.323, "dur": 0.408, + "args": { + "External id": 231016,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055191.752, "dur": 0.220, + "args": { + "External id": 231017,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055192.966, "dur": 0.333, + "args": { + "External id": 231018,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055194.592, "dur": 0.412, + "args": { + "External id": 231019,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055196.361, "dur": 0.221, + "args": { + "External id": 231020,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055199.194, "dur": 0.385, + "args": { + "External id": 231021,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055200.439, "dur": 0.329, + "args": { + "External id": 231022,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055202.026, "dur": 0.376, + "args": { + "External id": 231023,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055203.175, "dur": 0.372, + "args": { + "External id": 231024,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055204.348, "dur": 0.231, + "args": { + "External id": 231025,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055205.460, "dur": 0.332, + "args": { + "External id": 231026,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055206.524, "dur": 0.312, + "args": { + "External id": 231027,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055207.574, "dur": 0.369, + "args": { + "External id": 231028,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055210.647, "dur": 0.326, + "args": { + "External id": 231029,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055211.721, "dur": 0.373, + "args": { + "External id": 231030,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055212.996, "dur": 0.208, + "args": { + "External id": 231031,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055213.952, "dur": 0.211, + "args": { + "External id": 231032,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055214.940, "dur": 0.224, + "args": { + "External id": 231033,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055215.922, "dur": 0.209, + "args": { + "External id": 231034,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055216.891, "dur": 0.206, + "args": { + "External id": 231035,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055218.032, "dur": 0.212, + "args": { + "External id": 231036,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055220.962, "dur": 0.208, + "args": { + "External id": 231037,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055222.061, "dur": 0.209, + "args": { + "External id": 231038,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055223.470, "dur": 0.206, + "args": { + "External id": 231039,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055224.735, "dur": 0.209, + "args": { + "External id": 231040,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055225.868, "dur": 0.229, + "args": { + "External id": 231041,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055226.857, "dur": 0.206, + "args": { + "External id": 231042,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055227.850, "dur": 0.205, + "args": { + "External id": 231043,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055228.819, "dur": 0.210, + "args": { + "External id": 231044,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055231.708, "dur": 0.206, + "args": { + "External id": 231045,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055232.688, "dur": 0.227, + "args": { + "External id": 231046,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055233.920, "dur": 0.219, + "args": { + "External id": 231047,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055235.053, "dur": 0.213, + "args": { + "External id": 231048,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055236.002, "dur": 0.203, + "args": { + "External id": 231049,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055236.935, "dur": 0.320, + "args": { + "External id": 231050,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055238.109, "dur": 0.263, + "args": { + "External id": 231051,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055239.132, "dur": 0.314, + "args": { + "External id": 231052,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055242.254, "dur": 0.364, + "args": { + "External id": 231053,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055243.347, "dur": 0.369, + "args": { + "External id": 231054,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055244.730, "dur": 0.321, + "args": { + "External id": 231055,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055245.950, "dur": 0.210, + "args": { + "External id": 231056,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055246.933, "dur": 0.205, + "args": { + "External id": 231057,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055247.922, "dur": 0.210, + "args": { + "External id": 231058,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055248.883, "dur": 0.204, + "args": { + "External id": 231059,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055249.835, "dur": 0.210, + "args": { + "External id": 231060,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055252.824, "dur": 0.347, + "args": { + "External id": 231061,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055253.916, "dur": 0.350, + "args": { + "External id": 231062,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055255.005, "dur": 0.204, + "args": { + "External id": 231063,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055255.955, "dur": 0.207, + "args": { + "External id": 231064,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055256.899, "dur": 0.206, + "args": { + "External id": 231065,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055257.872, "dur": 0.211, + "args": { + "External id": 231066,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055259.036, "dur": 0.336, + "args": { + "External id": 231067,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055260.137, "dur": 0.342, + "args": { + "External id": 231068,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055263.122, "dur": 0.221, + "args": { + "External id": 231069,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055264.088, "dur": 0.361, + "args": { + "External id": 231070,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055265.185, "dur": 0.202, + "args": { + "External id": 231071,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055266.108, "dur": 0.319, + "args": { + "External id": 231072,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055267.241, "dur": 0.206, + "args": { + "External id": 231073,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055268.206, "dur": 0.212, + "args": { + "External id": 231074,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055269.149, "dur": 0.205, + "args": { + "External id": 231075,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055270.270, "dur": 0.207, + "args": { + "External id": 231076,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055272.836, "dur": 0.204, + "args": { + "External id": 231077,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055273.794, "dur": 0.212, + "args": { + "External id": 231078,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055274.932, "dur": 0.240, + "args": { + "External id": 231079,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055275.966, "dur": 0.209, + "args": { + "External id": 231080,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055276.952, "dur": 0.202, + "args": { + "External id": 231081,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055277.885, "dur": 0.209, + "args": { + "External id": 231082,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055278.840, "dur": 0.202, + "args": { + "External id": 231083,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055279.758, "dur": 0.213, + "args": { + "External id": 231084,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055282.206, "dur": 0.207, + "args": { + "External id": 231085,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055283.148, "dur": 0.208, + "args": { + "External id": 231086,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055284.107, "dur": 0.223, + "args": { + "External id": 231087,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055285.306, "dur": 0.207, + "args": { + "External id": 231088,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055286.285, "dur": 0.205, + "args": { + "External id": 231089,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055287.321, "dur": 0.207, + "args": { + "External id": 231090,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055289.183, "dur": 0.212, + "args": { + "External id": 231091,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055290.186, "dur": 0.213, + "args": { + "External id": 231092,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055292.953, "dur": 0.201, + "args": { + "External id": 231093,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055293.904, "dur": 0.221, + "args": { + "External id": 231094,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055294.879, "dur": 0.202, + "args": { + "External id": 231095,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055296.016, "dur": 0.224, + "args": { + "External id": 231096,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055297.078, "dur": 0.203, + "args": { + "External id": 231097,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055298.012, "dur": 0.215, + "args": { + "External id": 231098,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055302.504, "dur": 0.252, + "args": { + "External id": 231099,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055303.531, "dur": 0.199, + "args": { + "External id": 231100,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055306.531, "dur": 0.212, + "args": { + "External id": 231101,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055307.607, "dur": 0.205, + "args": { + "External id": 231102,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055308.577, "dur": 0.207, + "args": { + "External id": 231103,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055309.687, "dur": 0.203, + "args": { + "External id": 231104,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055310.722, "dur": 0.244, + "args": { + "External id": 231105,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055311.722, "dur": 0.206, + "args": { + "External id": 231106,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055313.692, "dur": 0.218, + "args": { + "External id": 231107,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055314.724, "dur": 0.204, + "args": { + "External id": 231108,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055317.592, "dur": 0.207, + "args": { + "External id": 231109,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055318.534, "dur": 0.230, + "args": { + "External id": 231110,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055319.491, "dur": 0.207, + "args": { + "External id": 231111,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055320.438, "dur": 0.201, + "args": { + "External id": 231112,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055321.383, "dur": 0.208, + "args": { + "External id": 231113,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055322.318, "dur": 0.203, + "args": { + "External id": 231114,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055323.493, "dur": 0.210, + "args": { + "External id": 231115,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055324.481, "dur": 0.309, + "args": { + "External id": 231116,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055327.443, "dur": 0.206, + "args": { + "External id": 231117,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055328.636, "dur": 0.220, + "args": { + "External id": 231118,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055329.577, "dur": 0.260, + "args": { + "External id": 231119,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055330.774, "dur": 0.226, + "args": { + "External id": 231120,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055331.957, "dur": 0.210, + "args": { + "External id": 231121,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055333.113, "dur": 0.203, + "args": { + "External id": 231122,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055334.458, "dur": 0.232, + "args": { + "External id": 231123,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055337.210, "dur": 0.206, + "args": { + "External id": 231124,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055339.994, "dur": 0.207, + "args": { + "External id": 231125,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055340.981, "dur": 0.204, + "args": { + "External id": 231126,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055341.928, "dur": 0.208, + "args": { + "External id": 231127,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055342.875, "dur": 0.205, + "args": { + "External id": 231128,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055343.814, "dur": 0.207, + "args": { + "External id": 231129,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055344.783, "dur": 0.203, + "args": { + "External id": 231130,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055345.747, "dur": 0.209, + "args": { + "External id": 231131,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055346.717, "dur": 0.208, + "args": { + "External id": 231132,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055349.089, "dur": 0.212, + "args": { + "External id": 231133,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055350.070, "dur": 0.203, + "args": { + "External id": 231134,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055351.021, "dur": 0.207, + "args": { + "External id": 231135,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055352.067, "dur": 0.207, + "args": { + "External id": 231136,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055353.007, "dur": 0.209, + "args": { + "External id": 231137,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055353.953, "dur": 0.203, + "args": { + "External id": 231138,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055354.972, "dur": 0.210, + "args": { + "External id": 231139,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055356.162, "dur": 0.226, + "args": { + "External id": 231140,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055359.445, "dur": 0.348, + "args": { + "External id": 231141,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055360.571, "dur": 0.334, + "args": { + "External id": 231142,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055361.981, "dur": 0.324, + "args": { + "External id": 231143,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055363.081, "dur": 0.216, + "args": { + "External id": 231144,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055364.069, "dur": 0.206, + "args": { + "External id": 231145,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055365.117, "dur": 0.208, + "args": { + "External id": 231146,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055366.202, "dur": 0.313, + "args": { + "External id": 231147,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055367.292, "dur": 0.211, + "args": { + "External id": 231148,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055369.885, "dur": 0.206, + "args": { + "External id": 231149,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055370.889, "dur": 0.206, + "args": { + "External id": 231150,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055371.811, "dur": 0.207, + "args": { + "External id": 231151,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055372.758, "dur": 0.220, + "args": { + "External id": 231152,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055373.945, "dur": 0.209, + "args": { + "External id": 231153,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055374.977, "dur": 0.208, + "args": { + "External id": 231154,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055375.901, "dur": 0.210, + "args": { + "External id": 231155,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055376.876, "dur": 0.206, + "args": { + "External id": 231156,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055379.713, "dur": 0.212, + "args": { + "External id": 231157,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055380.682, "dur": 0.204, + "args": { + "External id": 231158,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055381.642, "dur": 0.212, + "args": { + "External id": 231159,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055382.612, "dur": 0.205, + "args": { + "External id": 231160,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055383.670, "dur": 0.210, + "args": { + "External id": 231161,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055384.694, "dur": 0.203, + "args": { + "External id": 231162,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055386.007, "dur": 0.217, + "args": { + "External id": 231163,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055387.025, "dur": 0.202, + "args": { + "External id": 231164,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055389.589, "dur": 0.211, + "args": { + "External id": 231165,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055390.755, "dur": 0.225, + "args": { + "External id": 231166,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055391.707, "dur": 0.211, + "args": { + "External id": 231167,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055392.851, "dur": 0.204, + "args": { + "External id": 231168,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055393.776, "dur": 0.213, + "args": { + "External id": 231169,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055394.728, "dur": 0.204, + "args": { + "External id": 231170,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055395.882, "dur": 0.205, + "args": { + "External id": 231171,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055396.828, "dur": 0.202, + "args": { + "External id": 231172,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055399.613, "dur": 0.210, + "args": { + "External id": 231173,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055400.585, "dur": 0.378, + "args": { + "External id": 231174,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055401.684, "dur": 0.398, + "args": { + "External id": 231175,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055402.849, "dur": 0.337, + "args": { + "External id": 231176,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055404.062, "dur": 0.210, + "args": { + "External id": 231177,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055405.031, "dur": 0.316, + "args": { + "External id": 231178,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055406.066, "dur": 0.213, + "args": { + "External id": 231179,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055407.009, "dur": 0.229, + "args": { + "External id": 231180,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055409.835, "dur": 0.231, + "args": { + "External id": 231181,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055410.828, "dur": 0.332, + "args": { + "External id": 231182,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055411.878, "dur": 0.331, + "args": { + "External id": 231183,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055412.965, "dur": 0.205, + "args": { + "External id": 231184,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055413.886, "dur": 0.353, + "args": { + "External id": 231185,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055414.973, "dur": 0.335, + "args": { + "External id": 231186,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055416.405, "dur": 0.346, + "args": { + "External id": 231187,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055417.554, "dur": 0.350, + "args": { + "External id": 231188,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055420.584, "dur": 0.331, + "args": { + "External id": 231189,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055421.654, "dur": 0.214, + "args": { + "External id": 231190,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055422.587, "dur": 0.208, + "args": { + "External id": 231191,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055423.533, "dur": 0.205, + "args": { + "External id": 231192,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055424.485, "dur": 0.207, + "args": { + "External id": 231193,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055425.442, "dur": 0.201, + "args": { + "External id": 231194,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055426.378, "dur": 0.208, + "args": { + "External id": 231195,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055427.318, "dur": 0.204, + "args": { + "External id": 231196,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055430.189, "dur": 0.206, + "args": { + "External id": 231197,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055431.144, "dur": 0.202, + "args": { + "External id": 231198,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055432.072, "dur": 0.211, + "args": { + "External id": 231199,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055433.018, "dur": 0.206, + "args": { + "External id": 231200,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055433.942, "dur": 0.205, + "args": { + "External id": 231201,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055434.950, "dur": 0.204, + "args": { + "External id": 231202,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055435.887, "dur": 0.213, + "args": { + "External id": 231203,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055436.838, "dur": 0.205, + "args": { + "External id": 231204,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055439.505, "dur": 0.206, + "args": { + "External id": 231205,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055440.469, "dur": 0.208, + "args": { + "External id": 231206,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055441.404, "dur": 0.220, + "args": { + "External id": 231207,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055442.507, "dur": 0.350, + "args": { + "External id": 231208,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055443.606, "dur": 0.218, + "args": { + "External id": 231209,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055444.706, "dur": 0.204, + "args": { + "External id": 231210,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055445.630, "dur": 0.208, + "args": { + "External id": 231211,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055446.566, "dur": 0.204, + "args": { + "External id": 231212,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055449.234, "dur": 0.375, + "args": { + "External id": 231213,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055450.338, "dur": 0.338, + "args": { + "External id": 231214,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055451.674, "dur": 0.216, + "args": { + "External id": 231215,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055452.704, "dur": 0.306, + "args": { + "External id": 231216,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055453.826, "dur": 0.224, + "args": { + "External id": 231217,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055454.790, "dur": 0.339, + "args": { + "External id": 231218,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055455.851, "dur": 0.209, + "args": { + "External id": 231219,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055456.801, "dur": 0.205, + "args": { + "External id": 231220,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055459.587, "dur": 0.212, + "args": { + "External id": 231221,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055460.586, "dur": 0.203, + "args": { + "External id": 231222,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055461.562, "dur": 0.209, + "args": { + "External id": 231223,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055462.559, "dur": 0.207, + "args": { + "External id": 231224,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055463.503, "dur": 0.207, + "args": { + "External id": 231225,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055464.495, "dur": 0.207, + "args": { + "External id": 231226,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055465.442, "dur": 0.211, + "args": { + "External id": 231227,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055466.413, "dur": 0.205, + "args": { + "External id": 231228,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055468.911, "dur": 0.211, + "args": { + "External id": 231229,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055469.898, "dur": 0.202, + "args": { + "External id": 231230,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055470.883, "dur": 0.206, + "args": { + "External id": 231231,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055471.819, "dur": 0.203, + "args": { + "External id": 231232,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055472.742, "dur": 0.219, + "args": { + "External id": 231233,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055473.689, "dur": 0.202, + "args": { + "External id": 231234,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055474.619, "dur": 0.223, + "args": { + "External id": 231235,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055475.612, "dur": 0.204, + "args": { + "External id": 231236,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055479.251, "dur": 0.229, + "args": { + "External id": 231237,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055480.284, "dur": 0.202, + "args": { + "External id": 231238,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055481.262, "dur": 0.206, + "args": { + "External id": 231239,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055482.200, "dur": 0.202, + "args": { + "External id": 231240,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055483.121, "dur": 0.205, + "args": { + "External id": 231241,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055484.055, "dur": 0.201, + "args": { + "External id": 231242,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055485.025, "dur": 0.210, + "args": { + "External id": 231243,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055485.977, "dur": 0.204, + "args": { + "External id": 231244,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055488.565, "dur": 0.221, + "args": { + "External id": 231245,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055489.553, "dur": 0.201, + "args": { + "External id": 231246,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055490.856, "dur": 0.210, + "args": { + "External id": 231247,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055491.924, "dur": 0.211, + "args": { + "External id": 231248,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055492.957, "dur": 0.213, + "args": { + "External id": 231249,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055494.116, "dur": 0.201, + "args": { + "External id": 231250,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055495.041, "dur": 0.205, + "args": { + "External id": 231251,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055496.173, "dur": 0.202, + "args": { + "External id": 231252,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055498.907, "dur": 0.213, + "args": { + "External id": 231253,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055500.074, "dur": 0.202, + "args": { + "External id": 231254,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055501.006, "dur": 0.224, + "args": { + "External id": 231255,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055501.985, "dur": 0.202, + "args": { + "External id": 231256,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055502.939, "dur": 0.221, + "args": { + "External id": 231257,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055503.890, "dur": 0.201, + "args": { + "External id": 231258,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055504.947, "dur": 0.267, + "args": { + "External id": 231259,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055505.950, "dur": 0.206, + "args": { + "External id": 231260,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055508.586, "dur": 0.359, + "args": { + "External id": 231261,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055509.683, "dur": 0.236, + "args": { + "External id": 231262,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055510.636, "dur": 0.209, + "args": { + "External id": 231263,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055511.573, "dur": 0.330, + "args": { + "External id": 231264,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055512.649, "dur": 0.391, + "args": { + "External id": 231265,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055513.890, "dur": 0.248, + "args": { + "External id": 231266,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055514.920, "dur": 0.346, + "args": { + "External id": 231267,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055515.994, "dur": 0.376, + "args": { + "External id": 231268,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055518.966, "dur": 0.339, + "args": { + "External id": 231269,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055520.058, "dur": 0.383, + "args": { + "External id": 231270,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055521.165, "dur": 0.372, + "args": { + "External id": 231271,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055522.291, "dur": 0.339, + "args": { + "External id": 231272,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055523.408, "dur": 0.205, + "args": { + "External id": 231273,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055524.470, "dur": 0.338, + "args": { + "External id": 231274,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055525.689, "dur": 0.312, + "args": { + "External id": 231275,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055526.744, "dur": 0.318, + "args": { + "External id": 231276,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055529.372, "dur": 0.339, + "args": { + "External id": 231277,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055530.446, "dur": 0.311, + "args": { + "External id": 231278,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055531.525, "dur": 0.349, + "args": { + "External id": 231279,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055532.703, "dur": 0.381, + "args": { + "External id": 231280,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055533.827, "dur": 0.457, + "args": { + "External id": 231281,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055535.012, "dur": 0.231, + "args": { + "External id": 231282,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055536.365, "dur": 0.234, + "args": { + "External id": 231283,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055537.381, "dur": 0.224, + "args": { + "External id": 231284,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055540.208, "dur": 0.222, + "args": { + "External id": 231285,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055541.170, "dur": 0.380, + "args": { + "External id": 231286,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055542.310, "dur": 0.338, + "args": { + "External id": 231287,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055543.383, "dur": 0.329, + "args": { + "External id": 231288,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055544.439, "dur": 0.206, + "args": { + "External id": 231289,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055545.409, "dur": 0.309, + "args": { + "External id": 231290,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055546.453, "dur": 0.210, + "args": { + "External id": 231291,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055547.408, "dur": 0.341, + "args": { + "External id": 231292,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055550.317, "dur": 0.339, + "args": { + "External id": 231293,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055551.582, "dur": 0.292, + "args": { + "External id": 231294,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055552.600, "dur": 0.206, + "args": { + "External id": 231295,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055553.680, "dur": 0.201, + "args": { + "External id": 231296,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055554.614, "dur": 0.205, + "args": { + "External id": 231297,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055555.550, "dur": 0.204, + "args": { + "External id": 231298,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055556.493, "dur": 0.214, + "args": { + "External id": 231299,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055557.462, "dur": 0.202, + "args": { + "External id": 231300,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055560.457, "dur": 0.212, + "args": { + "External id": 231301,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055561.412, "dur": 0.205, + "args": { + "External id": 231302,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055562.347, "dur": 0.210, + "args": { + "External id": 231303,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055563.304, "dur": 0.201, + "args": { + "External id": 231304,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098055564.232, "dur": 0.208, + "args": { + "External id": 231305,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2070552, "tid": 2070552, + "ts": 5327098055657.075, "dur": 1503.593, + "args": { + "External id": 231306,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2070552, "tid": 2070552, + "ts": 5327098056100.832, "dur": 985.386, + "args": { + "External id": 231307,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056109.924, "dur": 7.551, + "args": { + "External id": 231308,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056113.822, "dur": 3.006, + "args": { + "External id": 231309,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056118.017, "dur": 2.567, + "args": { + "External id": 231310,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056119.108, "dur": 1.338, + "args": { + "External id": 231311,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056120.965, "dur": 2.580, + "args": { + "External id": 231312,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056122.904, "dur": 0.579, + "args": { + "External id": 231313,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056125.812, "dur": 1.324, + "args": { + "External id": 231314,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056126.267, "dur": 0.795, + "args": { + "External id": 231315,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056127.434, "dur": 4.474, + "args": { + "External id": 231316,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056131.143, "dur": 0.673, + "args": { + "External id": 231317,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056132.203, "dur": 1.104, + "args": { + "External id": 231318,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056132.622, "dur": 0.620, + "args": { + "External id": 231319,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056133.595, "dur": 1.087, + "args": { + "External id": 231320,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056133.989, "dur": 0.622, + "args": { + "External id": 231321,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056135.122, "dur": 2.850, + "args": { + "External id": 231322,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056137.252, "dur": 0.657, + "args": { + "External id": 231323,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056138.400, "dur": 1.150, + "args": { + "External id": 231324,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056138.969, "dur": 0.520, + "args": { + "External id": 231325,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056139.786, "dur": 1.989, + "args": { + "External id": 231326,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056140.163, "dur": 1.519, + "args": { + "External id": 231327,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056142.009, "dur": 2.317, + "args": { + "External id": 231328,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056143.665, "dur": 0.586, + "args": { + "External id": 231329,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056146.481, "dur": 1.452, + "args": { + "External id": 231330,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056147.014, "dur": 0.845, + "args": { + "External id": 231331,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056148.183, "dur": 4.319, + "args": { + "External id": 231332,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056151.649, "dur": 0.778, + "args": { + "External id": 231333,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056152.741, "dur": 1.651, + "args": { + "External id": 231334,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056153.509, "dur": 0.809, + "args": { + "External id": 231335,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056154.622, "dur": 1.733, + "args": { + "External id": 231336,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056155.008, "dur": 1.274, + "args": { + "External id": 231337,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056156.855, "dur": 2.839, + "args": { + "External id": 231338,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056159.023, "dur": 0.609, + "args": { + "External id": 231339,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056160.199, "dur": 1.017, + "args": { + "External id": 231340,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056160.601, "dur": 0.550, + "args": { + "External id": 231341,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056161.467, "dur": 1.921, + "args": { + "External id": 231342,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056161.985, "dur": 1.324, + "args": { + "External id": 231343,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056163.616, "dur": 2.238, + "args": { + "External id": 231344,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056165.294, "dur": 0.495, + "args": { + "External id": 231345,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056168.023, "dur": 1.532, + "args": { + "External id": 231346,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056168.654, "dur": 0.829, + "args": { + "External id": 231347,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056170.017, "dur": 3.669, + "args": { + "External id": 231348,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056172.865, "dur": 0.759, + "args": { + "External id": 231349,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056174.103, "dur": 0.952, + "args": { + "External id": 231350,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056174.524, "dur": 0.468, + "args": { + "External id": 231351,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056175.634, "dur": 1.236, + "args": { + "External id": 231352,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056176.040, "dur": 0.760, + "args": { + "External id": 231353,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056177.115, "dur": 2.817, + "args": { + "External id": 231354,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056179.297, "dur": 0.572, + "args": { + "External id": 231355,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056180.155, "dur": 1.252, + "args": { + "External id": 231356,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056180.681, "dur": 0.663, + "args": { + "External id": 231357,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056181.633, "dur": 2.019, + "args": { + "External id": 231358,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056182.099, "dur": 1.475, + "args": { + "External id": 231359,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056184.096, "dur": 2.064, + "args": { + "External id": 231360,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056185.394, "dur": 0.702, + "args": { + "External id": 231361,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056188.523, "dur": 1.239, + "args": { + "External id": 231362,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056188.888, "dur": 0.805, + "args": { + "External id": 231363,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056189.989, "dur": 2.878, + "args": { + "External id": 231364,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056192.125, "dur": 0.671, + "args": { + "External id": 231365,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056193.119, "dur": 1.097, + "args": { + "External id": 231366,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056193.553, "dur": 0.593, + "args": { + "External id": 231367,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056194.528, "dur": 1.218, + "args": { + "External id": 231368,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056194.949, "dur": 0.725, + "args": { + "External id": 231369,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056196.152, "dur": 3.055, + "args": { + "External id": 231370,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056198.427, "dur": 0.711, + "args": { + "External id": 231371,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056199.654, "dur": 0.977, + "args": { + "External id": 231372,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056200.088, "dur": 0.476, + "args": { + "External id": 231373,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056201.319, "dur": 2.517, + "args": { + "External id": 231374,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056201.724, "dur": 2.035, + "args": { + "External id": 231375,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056204.061, "dur": 2.204, + "args": { + "External id": 231376,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056205.594, "dur": 0.599, + "args": { + "External id": 231377,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056208.496, "dur": 1.639, + "args": { + "External id": 231378,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056209.020, "dur": 1.045, + "args": { + "External id": 231379,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056210.561, "dur": 3.389, + "args": { + "External id": 231380,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056213.158, "dur": 0.716, + "args": { + "External id": 231381,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056214.501, "dur": 1.219, + "args": { + "External id": 231382,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056215.040, "dur": 0.616, + "args": { + "External id": 231383,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056216.113, "dur": 1.265, + "args": { + "External id": 231384,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056216.503, "dur": 0.808, + "args": { + "External id": 231385,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056217.604, "dur": 2.936, + "args": { + "External id": 231386,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056219.751, "dur": 0.723, + "args": { + "External id": 231387,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056220.895, "dur": 1.185, + "args": { + "External id": 231388,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056221.255, "dur": 0.757, + "args": { + "External id": 231389,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056222.309, "dur": 2.475, + "args": { + "External id": 231390,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056223.271, "dur": 1.347, + "args": { + "External id": 231391,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056225.051, "dur": 1.620, + "args": { + "External id": 231392,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056226.151, "dur": 0.447, + "args": { + "External id": 231393,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056228.909, "dur": 1.724, + "args": { + "External id": 231394,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056229.635, "dur": 0.923, + "args": { + "External id": 231395,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056230.885, "dur": 2.790, + "args": { + "External id": 231396,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056232.884, "dur": 0.726, + "args": { + "External id": 231397,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056234.112, "dur": 1.858, + "args": { + "External id": 231398,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056235.367, "dur": 0.532, + "args": { + "External id": 231399,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056236.255, "dur": 2.463, + "args": { + "External id": 231400,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056237.975, "dur": 0.672, + "args": { + "External id": 231401,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056238.971, "dur": 2.615, + "args": { + "External id": 231402,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056241.178, "dur": 0.343, + "args": { + "External id": 231403,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056241.907, "dur": 1.449, + "args": { + "External id": 231404,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056242.748, "dur": 0.541, + "args": { + "External id": 231405,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056243.623, "dur": 2.660, + "args": { + "External id": 231406,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056244.643, "dur": 1.564, + "args": { + "External id": 231407,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056246.756, "dur": 1.648, + "args": { + "External id": 231408,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056247.815, "dur": 0.523, + "args": { + "External id": 231409,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056250.378, "dur": 1.524, + "args": { + "External id": 231410,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056251.149, "dur": 0.682, + "args": { + "External id": 231411,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056252.359, "dur": 3.293, + "args": { + "External id": 231412,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056255.094, "dur": 0.493, + "args": { + "External id": 231413,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056256.141, "dur": 1.539, + "args": { + "External id": 231414,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056256.815, "dur": 0.799, + "args": { + "External id": 231415,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056258.002, "dur": 1.893, + "args": { + "External id": 231416,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056259.171, "dur": 0.653, + "args": { + "External id": 231417,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056260.173, "dur": 2.616, + "args": { + "External id": 231418,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056261.785, "dur": 0.943, + "args": { + "External id": 231419,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056263.259, "dur": 1.982, + "args": { + "External id": 231420,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056264.388, "dur": 0.791, + "args": { + "External id": 231421,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056265.710, "dur": 2.408, + "args": { + "External id": 231422,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056266.579, "dur": 1.358, + "args": { + "External id": 231423,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056268.386, "dur": 22.787, + "args": { + "External id": 231424,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056290.035, "dur": 1.050, + "args": { + "External id": 236545,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056293.208, "dur": 2.530, + "args": { + "External id": 236546,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056293.931, "dur": 1.630, + "args": { + "External id": 236547,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056296.012, "dur": 1.677, + "args": { + "External id": 236548,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056297.117, "dur": 0.503, + "args": { + "External id": 236549,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056298.046, "dur": 1.585, + "args": { + "External id": 236550,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056298.752, "dur": 0.809, + "args": { + "External id": 236551,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056299.925, "dur": 3.097, + "args": { + "External id": 236552,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056302.327, "dur": 0.623, + "args": { + "External id": 236553,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056303.322, "dur": 1.673, + "args": { + "External id": 236554,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056304.422, "dur": 0.506, + "args": { + "External id": 236555,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056305.259, "dur": 7.302, + "args": { + "External id": 236556,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056310.471, "dur": 2.010, + "args": { + "External id": 236557,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056312.849, "dur": 1.299, + "args": { + "External id": 236558,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056313.639, "dur": 0.438, + "args": { + "External id": 236559,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056314.465, "dur": 2.201, + "args": { + "External id": 236560,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056315.939, "dur": 0.655, + "args": { + "External id": 236561,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056318.568, "dur": 2.506, + "args": { + "External id": 236562,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056320.431, "dur": 0.584, + "args": { + "External id": 236563,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056321.358, "dur": 1.884, + "args": { + "External id": 236564,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056322.403, "dur": 0.775, + "args": { + "External id": 236565,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056323.491, "dur": 1.600, + "args": { + "External id": 236566,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056324.204, "dur": 0.820, + "args": { + "External id": 236567,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056325.342, "dur": 3.209, + "args": { + "External id": 236568,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056327.764, "dur": 0.725, + "args": { + "External id": 236569,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056328.810, "dur": 1.582, + "args": { + "External id": 236570,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056329.520, "dur": 0.809, + "args": { + "External id": 236571,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056330.645, "dur": 2.879, + "args": { + "External id": 236572,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056331.743, "dur": 1.521, + "args": { + "External id": 236573,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056333.819, "dur": 1.558, + "args": { + "External id": 236574,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056334.748, "dur": 0.570, + "args": { + "External id": 236575,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056335.762, "dur": 2.177, + "args": { + "External id": 236576,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056336.904, "dur": 0.964, + "args": { + "External id": 236577,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056340.245, "dur": 2.905, + "args": { + "External id": 236578,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056342.345, "dur": 0.740, + "args": { + "External id": 236579,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056343.423, "dur": 1.590, + "args": { + "External id": 236580,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056344.372, "dur": 0.575, + "args": { + "External id": 236581,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056345.267, "dur": 1.680, + "args": { + "External id": 236582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056346.213, "dur": 0.664, + "args": { + "External id": 236583,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056347.201, "dur": 3.358, + "args": { + "External id": 236584,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056349.618, "dur": 0.850, + "args": { + "External id": 236585,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056350.904, "dur": 1.408, + "args": { + "External id": 236586,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056351.688, "dur": 0.555, + "args": { + "External id": 236587,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056352.614, "dur": 2.470, + "args": { + "External id": 236588,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056353.755, "dur": 1.254, + "args": { + "External id": 236589,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056355.338, "dur": 1.325, + "args": { + "External id": 236590,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056356.019, "dur": 0.576, + "args": { + "External id": 236591,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056357.070, "dur": 1.898, + "args": { + "External id": 236592,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056358.251, "dur": 0.646, + "args": { + "External id": 236593,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056361.032, "dur": 2.737, + "args": { + "External id": 236594,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056363.139, "dur": 0.563, + "args": { + "External id": 236595,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056364.053, "dur": 2.523, + "args": { + "External id": 236596,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056365.788, "dur": 0.720, + "args": { + "External id": 236597,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056366.832, "dur": 1.366, + "args": { + "External id": 236598,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056367.523, "dur": 0.605, + "args": { + "External id": 236599,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056368.472, "dur": 3.540, + "args": { + "External id": 236600,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056371.381, "dur": 0.568, + "args": { + "External id": 236601,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056372.394, "dur": 1.385, + "args": { + "External id": 236602,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056373.030, "dur": 0.684, + "args": { + "External id": 236603,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056374.092, "dur": 2.614, + "args": { + "External id": 236604,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056375.152, "dur": 1.476, + "args": { + "External id": 236605,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056376.975, "dur": 1.795, + "args": { + "External id": 236606,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056378.060, "dur": 0.642, + "args": { + "External id": 236607,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056379.035, "dur": 2.005, + "args": { + "External id": 236608,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056380.220, "dur": 0.749, + "args": { + "External id": 236609,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056382.781, "dur": 2.712, + "args": { + "External id": 236610,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056385.004, "dur": 0.420, + "args": { + "External id": 236611,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056385.786, "dur": 1.533, + "args": { + "External id": 236612,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056386.761, "dur": 0.487, + "args": { + "External id": 236613,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056387.661, "dur": 1.216, + "args": { + "External id": 236614,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056388.258, "dur": 0.547, + "args": { + "External id": 236615,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056389.145, "dur": 2.852, + "args": { + "External id": 236616,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056391.319, "dur": 0.608, + "args": { + "External id": 236617,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056392.280, "dur": 1.751, + "args": { + "External id": 236618,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056393.187, "dur": 0.776, + "args": { + "External id": 236619,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056394.300, "dur": 2.539, + "args": { + "External id": 236620,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056395.223, "dur": 1.450, + "args": { + "External id": 236621,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056397.121, "dur": 1.839, + "args": { + "External id": 236622,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056398.286, "dur": 0.604, + "args": { + "External id": 236623,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056399.435, "dur": 1.933, + "args": { + "External id": 236624,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056400.558, "dur": 0.742, + "args": { + "External id": 236625,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056403.194, "dur": 3.333, + "args": { + "External id": 236626,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056405.933, "dur": 0.526, + "args": { + "External id": 236627,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056406.800, "dur": 1.615, + "args": { + "External id": 236628,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056407.837, "dur": 0.510, + "args": { + "External id": 236629,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056408.666, "dur": 1.692, + "args": { + "External id": 236630,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056409.392, "dur": 0.895, + "args": { + "External id": 236631,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056410.610, "dur": 3.166, + "args": { + "External id": 236632,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056413.243, "dur": 0.470, + "args": { + "External id": 236633,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056414.075, "dur": 1.318, + "args": { + "External id": 236634,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056414.823, "dur": 0.506, + "args": { + "External id": 236635,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056415.650, "dur": 2.479, + "args": { + "External id": 236636,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056416.618, "dur": 1.440, + "args": { + "External id": 236637,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056419.586, "dur": 1.413, + "args": { + "External id": 236638,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056420.495, "dur": 0.437, + "args": { + "External id": 236639,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056421.273, "dur": 2.472, + "args": { + "External id": 236640,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056422.447, "dur": 1.225, + "args": { + "External id": 236641,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056425.857, "dur": 1.042, + "args": { + "External id": 236642,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056426.599, "dur": 0.233, + "args": { + "External id": 236643,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056427.293, "dur": 1.823, + "args": { + "External id": 236644,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056428.178, "dur": 0.863, + "args": { + "External id": 236645,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056429.365, "dur": 2.680, + "args": { + "External id": 236646,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056431.481, "dur": 0.497, + "args": { + "External id": 236647,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056432.332, "dur": 1.289, + "args": { + "External id": 236648,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056432.981, "dur": 0.575, + "args": { + "External id": 236649,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056433.872, "dur": 1.577, + "args": { + "External id": 236650,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056434.747, "dur": 0.634, + "args": { + "External id": 236651,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056435.718, "dur": 2.047, + "args": { + "External id": 236652,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056437.215, "dur": 0.482, + "args": { + "External id": 236653,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056438.015, "dur": 1.370, + "args": { + "External id": 236654,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056438.756, "dur": 0.565, + "args": { + "External id": 236655,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056439.638, "dur": 2.002, + "args": { + "External id": 236656,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056440.449, "dur": 1.117, + "args": { + "External id": 236657,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056443.585, "dur": 1.644, + "args": { + "External id": 236658,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056444.461, "dur": 0.701, + "args": { + "External id": 236659,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056445.517, "dur": 1.498, + "args": { + "External id": 236660,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056446.190, "dur": 0.754, + "args": { + "External id": 236661,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056447.307, "dur": 2.824, + "args": { + "External id": 236662,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056449.628, "dur": 0.436, + "args": { + "External id": 236663,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056450.421, "dur": 1.603, + "args": { + "External id": 236664,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056451.482, "dur": 0.481, + "args": { + "External id": 236665,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056452.364, "dur": 1.370, + "args": { + "External id": 236666,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056453.102, "dur": 0.562, + "args": { + "External id": 236667,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056454.057, "dur": 2.554, + "args": { + "External id": 236668,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056455.977, "dur": 0.567, + "args": { + "External id": 236669,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056456.878, "dur": 1.337, + "args": { + "External id": 236670,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056457.653, "dur": 0.494, + "args": { + "External id": 236671,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056458.518, "dur": 2.364, + "args": { + "External id": 236672,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056459.270, "dur": 1.531, + "args": { + "External id": 236673,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056462.803, "dur": 1.427, + "args": { + "External id": 236674,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056463.624, "dur": 0.541, + "args": { + "External id": 236675,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056464.521, "dur": 1.849, + "args": { + "External id": 236676,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056465.344, "dur": 0.959, + "args": { + "External id": 236677,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056466.621, "dur": 2.807, + "args": { + "External id": 236678,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056468.616, "dur": 0.744, + "args": { + "External id": 236679,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056469.703, "dur": 1.570, + "args": { + "External id": 236680,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056470.462, "dur": 0.741, + "args": { + "External id": 236681,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056471.534, "dur": 2.016, + "args": { + "External id": 236682,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056472.671, "dur": 0.812, + "args": { + "External id": 236683,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056473.829, "dur": 2.700, + "args": { + "External id": 236684,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056475.735, "dur": 0.721, + "args": { + "External id": 236685,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056476.807, "dur": 1.542, + "args": { + "External id": 236686,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056477.694, "dur": 0.593, + "args": { + "External id": 236687,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056478.601, "dur": 2.507, + "args": { + "External id": 236688,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056479.383, "dur": 1.655, + "args": { + "External id": 236689,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056482.754, "dur": 1.570, + "args": { + "External id": 236690,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056483.485, "dur": 0.771, + "args": { + "External id": 236691,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056484.576, "dur": 1.566, + "args": { + "External id": 236692,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056485.149, "dur": 0.924, + "args": { + "External id": 236693,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056486.442, "dur": 2.968, + "args": { + "External id": 236694,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056488.738, "dur": 0.604, + "args": { + "External id": 236695,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056489.709, "dur": 1.278, + "args": { + "External id": 236696,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056490.477, "dur": 0.438, + "args": { + "External id": 236697,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056491.241, "dur": 1.379, + "args": { + "External id": 236698,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056491.969, "dur": 0.582, + "args": { + "External id": 236699,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056492.967, "dur": 1.861, + "args": { + "External id": 236700,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056494.182, "dur": 0.579, + "args": { + "External id": 236701,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056495.114, "dur": 1.743, + "args": { + "External id": 236702,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056495.905, "dur": 0.883, + "args": { + "External id": 236703,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056497.208, "dur": 1.894, + "args": { + "External id": 236704,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056497.843, "dur": 1.165, + "args": { + "External id": 236705,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056501.029, "dur": 1.418, + "args": { + "External id": 236706,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056501.649, "dur": 0.731, + "args": { + "External id": 236707,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056502.703, "dur": 1.587, + "args": { + "External id": 236708,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056503.338, "dur": 0.885, + "args": { + "External id": 236709,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056504.582, "dur": 2.499, + "args": { + "External id": 236710,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056506.250, "dur": 0.763, + "args": { + "External id": 236711,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056507.533, "dur": 1.514, + "args": { + "External id": 236712,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056508.448, "dur": 0.535, + "args": { + "External id": 236713,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056509.401, "dur": 1.473, + "args": { + "External id": 236714,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056510.262, "dur": 0.545, + "args": { + "External id": 236715,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056511.175, "dur": 2.977, + "args": { + "External id": 236716,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056513.200, "dur": 0.886, + "args": { + "External id": 236717,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056514.489, "dur": 1.404, + "args": { + "External id": 236718,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056515.298, "dur": 0.528, + "args": { + "External id": 236719,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056516.147, "dur": 2.546, + "args": { + "External id": 236720,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056516.976, "dur": 1.635, + "args": { + "External id": 236721,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056520.496, "dur": 1.398, + "args": { + "External id": 236722,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056521.310, "dur": 0.516, + "args": { + "External id": 236723,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056522.146, "dur": 1.574, + "args": { + "External id": 236724,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056522.993, "dur": 0.657, + "args": { + "External id": 236725,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056523.989, "dur": 2.494, + "args": { + "External id": 236726,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056525.626, "dur": 0.790, + "args": { + "External id": 236727,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056526.789, "dur": 1.684, + "args": { + "External id": 236728,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056527.872, "dur": 0.534, + "args": { + "External id": 236729,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056528.725, "dur": 1.808, + "args": { + "External id": 236730,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056529.653, "dur": 0.810, + "args": { + "External id": 236731,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056530.892, "dur": 2.348, + "args": { + "External id": 236732,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056532.575, "dur": 0.598, + "args": { + "External id": 236733,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056533.516, "dur": 1.407, + "args": { + "External id": 236734,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056534.073, "dur": 0.781, + "args": { + "External id": 236735,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056535.232, "dur": 2.183, + "args": { + "External id": 236736,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056535.986, "dur": 1.355, + "args": { + "External id": 236737,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056539.880, "dur": 1.310, + "args": { + "External id": 236738,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056540.525, "dur": 0.598, + "args": { + "External id": 236739,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056541.480, "dur": 1.613, + "args": { + "External id": 236740,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056542.085, "dur": 0.936, + "args": { + "External id": 236741,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056543.360, "dur": 2.275, + "args": { + "External id": 236742,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056544.950, "dur": 0.614, + "args": { + "External id": 236743,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056545.985, "dur": 1.362, + "args": { + "External id": 236744,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056546.725, "dur": 0.555, + "args": { + "External id": 236745,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056547.597, "dur": 1.896, + "args": { + "External id": 236746,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056548.624, "dur": 0.801, + "args": { + "External id": 236747,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056549.801, "dur": 2.419, + "args": { + "External id": 236748,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056551.511, "dur": 0.637, + "args": { + "External id": 236749,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056552.664, "dur": 1.542, + "args": { + "External id": 236750,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056553.479, "dur": 0.659, + "args": { + "External id": 236751,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056554.688, "dur": 2.096, + "args": { + "External id": 236752,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056555.471, "dur": 1.244, + "args": { + "External id": 236753,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056559.103, "dur": 1.323, + "args": { + "External id": 236754,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056559.897, "dur": 0.465, + "args": { + "External id": 236755,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056560.706, "dur": 1.924, + "args": { + "External id": 236756,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056561.667, "dur": 0.893, + "args": { + "External id": 236757,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056563.017, "dur": 2.557, + "args": { + "External id": 236758,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056564.954, "dur": 0.553, + "args": { + "External id": 236759,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056565.849, "dur": 1.207, + "args": { + "External id": 236760,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056566.457, "dur": 0.533, + "args": { + "External id": 236761,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056567.357, "dur": 1.415, + "args": { + "External id": 236762,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056568.046, "dur": 0.658, + "args": { + "External id": 236763,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056569.024, "dur": 2.224, + "args": { + "External id": 236764,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056570.748, "dur": 0.433, + "args": { + "External id": 236765,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056571.506, "dur": 1.699, + "args": { + "External id": 236766,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056572.559, "dur": 0.581, + "args": { + "External id": 236767,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056573.463, "dur": 2.526, + "args": { + "External id": 236768,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056574.490, "dur": 1.232, + "args": { + "External id": 236769,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056578.270, "dur": 1.464, + "args": { + "External id": 236770,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056579.043, "dur": 0.622, + "args": { + "External id": 236771,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056579.994, "dur": 1.537, + "args": { + "External id": 236772,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056580.794, "dur": 0.672, + "args": { + "External id": 236773,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056581.783, "dur": 2.343, + "args": { + "External id": 236774,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056583.516, "dur": 0.546, + "args": { + "External id": 236775,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056584.409, "dur": 1.510, + "args": { + "External id": 236776,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056585.207, "dur": 0.645, + "args": { + "External id": 236777,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056586.183, "dur": 1.639, + "args": { + "External id": 236778,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056587.016, "dur": 0.738, + "args": { + "External id": 236779,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056588.177, "dur": 2.712, + "args": { + "External id": 236780,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056590.202, "dur": 0.618, + "args": { + "External id": 236781,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056591.285, "dur": 1.523, + "args": { + "External id": 236782,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056592.228, "dur": 0.512, + "args": { + "External id": 236783,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056593.054, "dur": 2.447, + "args": { + "External id": 236784,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056593.877, "dur": 1.550, + "args": { + "External id": 236785,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056597.312, "dur": 1.434, + "args": { + "External id": 236786,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056598.082, "dur": 0.593, + "args": { + "External id": 236787,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056598.995, "dur": 1.937, + "args": { + "External id": 236788,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056599.951, "dur": 0.911, + "args": { + "External id": 236789,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056601.193, "dur": 2.467, + "args": { + "External id": 236790,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056603.002, "dur": 0.590, + "args": { + "External id": 236791,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056603.935, "dur": 1.234, + "args": { + "External id": 236792,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056604.569, "dur": 0.533, + "args": { + "External id": 236793,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056605.471, "dur": 1.539, + "args": { + "External id": 236794,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056606.346, "dur": 0.598, + "args": { + "External id": 236795,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056607.292, "dur": 2.536, + "args": { + "External id": 236796,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056608.935, "dur": 0.824, + "args": { + "External id": 236797,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056610.108, "dur": 1.320, + "args": { + "External id": 236798,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056610.694, "dur": 0.668, + "args": { + "External id": 236799,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056611.884, "dur": 2.034, + "args": { + "External id": 236800,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056612.561, "dur": 1.286, + "args": { + "External id": 236801,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056615.945, "dur": 1.419, + "args": { + "External id": 236802,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056616.644, "dur": 0.653, + "args": { + "External id": 236803,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056617.676, "dur": 28.095, + "args": { + "External id": 236804,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056618.514, "dur": 26.266, + "args": { + "External id": 236805,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056647.394, "dur": 4.048, + "args": { + "External id": 236806,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056650.472, "dur": 0.903, + "args": { + "External id": 236807,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056651.775, "dur": 1.671, + "args": { + "External id": 236808,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056652.705, "dur": 0.670, + "args": { + "External id": 236809,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056653.712, "dur": 1.799, + "args": { + "External id": 236810,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056654.791, "dur": 0.654, + "args": { + "External id": 236811,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056655.935, "dur": 2.982, + "args": { + "External id": 236812,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056658.184, "dur": 0.668, + "args": { + "External id": 236813,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056659.184, "dur": 1.481, + "args": { + "External id": 236814,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056659.862, "dur": 0.736, + "args": { + "External id": 236815,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056660.958, "dur": 2.081, + "args": { + "External id": 236816,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056661.761, "dur": 1.210, + "args": { + "External id": 236817,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056665.058, "dur": 1.963, + "args": { + "External id": 236818,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056666.168, "dur": 0.788, + "args": { + "External id": 236819,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056667.290, "dur": 1.491, + "args": { + "External id": 236820,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056667.924, "dur": 0.794, + "args": { + "External id": 236821,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056669.064, "dur": 3.173, + "args": { + "External id": 236822,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056671.588, "dur": 0.581, + "args": { + "External id": 236823,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056672.529, "dur": 1.715, + "args": { + "External id": 236824,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056673.334, "dur": 0.841, + "args": { + "External id": 236825,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056674.491, "dur": 1.877, + "args": { + "External id": 236826,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056675.249, "dur": 1.056, + "args": { + "External id": 236827,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056676.639, "dur": 2.731, + "args": { + "External id": 236828,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056678.553, "dur": 0.750, + "args": { + "External id": 236829,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056679.621, "dur": 1.312, + "args": { + "External id": 236830,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056680.210, "dur": 0.663, + "args": { + "External id": 236831,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056681.195, "dur": 2.471, + "args": { + "External id": 236832,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056682.008, "dur": 1.587, + "args": { + "External id": 236833,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056685.460, "dur": 1.557, + "args": { + "External id": 236834,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056686.131, "dur": 0.819, + "args": { + "External id": 236835,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056687.456, "dur": 1.760, + "args": { + "External id": 236836,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056688.207, "dur": 0.948, + "args": { + "External id": 236837,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056689.553, "dur": 3.248, + "args": { + "External id": 236838,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056691.641, "dur": 1.093, + "args": { + "External id": 236839,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056693.074, "dur": 1.565, + "args": { + "External id": 236840,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056693.878, "dur": 0.693, + "args": { + "External id": 236841,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056694.889, "dur": 1.350, + "args": { + "External id": 236842,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056695.487, "dur": 0.688, + "args": { + "External id": 236843,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056696.515, "dur": 2.117, + "args": { + "External id": 236844,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056697.896, "dur": 0.667, + "args": { + "External id": 236845,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056698.973, "dur": 1.235, + "args": { + "External id": 236846,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056699.566, "dur": 0.577, + "args": { + "External id": 236847,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056700.556, "dur": 2.227, + "args": { + "External id": 236848,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056701.414, "dur": 1.101, + "args": { + "External id": 236849,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056704.695, "dur": 2.375, + "args": { + "External id": 236850,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056705.741, "dur": 1.267, + "args": { + "External id": 236851,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056707.345, "dur": 1.464, + "args": { + "External id": 236852,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056708.272, "dur": 0.470, + "args": { + "External id": 236853,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056709.096, "dur": 2.754, + "args": { + "External id": 236854,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056711.172, "dur": 0.614, + "args": { + "External id": 236855,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056712.228, "dur": 1.623, + "args": { + "External id": 236856,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056713.142, "dur": 0.646, + "args": { + "External id": 236857,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056714.123, "dur": 1.488, + "args": { + "External id": 236858,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056714.779, "dur": 0.768, + "args": { + "External id": 236859,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056715.976, "dur": 2.976, + "args": { + "External id": 236860,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056717.983, "dur": 0.905, + "args": { + "External id": 236861,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056719.204, "dur": 1.551, + "args": { + "External id": 236862,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056720.148, "dur": 0.541, + "args": { + "External id": 236863,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056721.010, "dur": 1.761, + "args": { + "External id": 236864,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056721.729, "dur": 0.977, + "args": { + "External id": 236865,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056724.441, "dur": 1.348, + "args": { + "External id": 236866,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056725.166, "dur": 0.555, + "args": { + "External id": 236867,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056726.245, "dur": 2.185, + "args": { + "External id": 236868,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056727.280, "dur": 1.086, + "args": { + "External id": 236869,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056728.860, "dur": 2.491, + "args": { + "External id": 236870,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056730.650, "dur": 0.632, + "args": { + "External id": 236871,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056731.620, "dur": 1.719, + "args": { + "External id": 236872,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056732.498, "dur": 0.774, + "args": { + "External id": 236873,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056733.580, "dur": 1.581, + "args": { + "External id": 236874,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056734.227, "dur": 0.869, + "args": { + "External id": 236875,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056735.469, "dur": 2.210, + "args": { + "External id": 236876,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056737.058, "dur": 0.548, + "args": { + "External id": 236877,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056737.945, "dur": 1.450, + "args": { + "External id": 236878,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056738.507, "dur": 0.819, + "args": { + "External id": 236879,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056739.747, "dur": 2.170, + "args": { + "External id": 236880,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056740.657, "dur": 1.191, + "args": { + "External id": 236881,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056743.803, "dur": 1.363, + "args": { + "External id": 236882,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056744.472, "dur": 0.628, + "args": { + "External id": 236883,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056745.410, "dur": 1.279, + "args": { + "External id": 236884,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056746.021, "dur": 0.606, + "args": { + "External id": 236885,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056746.938, "dur": 2.802, + "args": { + "External id": 236886,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056748.953, "dur": 0.720, + "args": { + "External id": 236887,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056750.038, "dur": 1.745, + "args": { + "External id": 236888,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056750.953, "dur": 0.761, + "args": { + "External id": 236889,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056752.052, "dur": 1.270, + "args": { + "External id": 236890,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056752.621, "dur": 0.638, + "args": { + "External id": 236891,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056753.570, "dur": 2.348, + "args": { + "External id": 236892,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056755.272, "dur": 0.583, + "args": { + "External id": 236893,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056757.860, "dur": 1.519, + "args": { + "External id": 236894,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056758.668, "dur": 0.642, + "args": { + "External id": 236895,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056759.637, "dur": 2.519, + "args": { + "External id": 236896,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056760.354, "dur": 1.638, + "args": { + "External id": 236897,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056763.985, "dur": 1.695, + "args": { + "External id": 236898,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056764.887, "dur": 0.725, + "args": { + "External id": 236899,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056765.965, "dur": 1.727, + "args": { + "External id": 236900,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056766.642, "dur": 0.986, + "args": { + "External id": 236901,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056767.957, "dur": 3.081, + "args": { + "External id": 236902,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056770.313, "dur": 0.663, + "args": { + "External id": 236903,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056771.358, "dur": 1.709, + "args": { + "External id": 236904,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056772.224, "dur": 0.777, + "args": { + "External id": 236905,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056773.679, "dur": 1.498, + "args": { + "External id": 236906,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056774.331, "dur": 0.782, + "args": { + "External id": 236907,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056775.431, "dur": 3.081, + "args": { + "External id": 236908,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056777.690, "dur": 0.756, + "args": { + "External id": 236909,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056778.752, "dur": 1.759, + "args": { + "External id": 236910,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056779.507, "dur": 0.938, + "args": { + "External id": 236911,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056780.771, "dur": 2.336, + "args": { + "External id": 236912,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056781.528, "dur": 1.423, + "args": { + "External id": 236913,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056784.756, "dur": 1.239, + "args": { + "External id": 236914,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056785.410, "dur": 0.518, + "args": { + "External id": 236915,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056786.391, "dur": 1.891, + "args": { + "External id": 236916,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056787.418, "dur": 0.800, + "args": { + "External id": 236917,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056788.615, "dur": 2.921, + "args": { + "External id": 236918,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056790.982, "dur": 0.493, + "args": { + "External id": 236919,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056791.808, "dur": 1.542, + "args": { + "External id": 236920,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056792.744, "dur": 0.537, + "args": { + "External id": 236921,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056793.770, "dur": 1.559, + "args": { + "External id": 236922,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056794.416, "dur": 0.848, + "args": { + "External id": 236923,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056795.577, "dur": 2.935, + "args": { + "External id": 236924,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056797.902, "dur": 0.548, + "args": { + "External id": 236925,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056798.750, "dur": 1.494, + "args": { + "External id": 236926,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056799.392, "dur": 0.788, + "args": { + "External id": 236927,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056800.494, "dur": 2.929, + "args": { + "External id": 236928,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056801.534, "dur": 1.818, + "args": { + "External id": 236929,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056805.505, "dur": 1.576, + "args": { + "External id": 236930,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056806.249, "dur": 0.767, + "args": { + "External id": 236931,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056807.347, "dur": 1.951, + "args": { + "External id": 236932,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056808.227, "dur": 1.005, + "args": { + "External id": 236933,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056809.578, "dur": 2.960, + "args": { + "External id": 236934,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056811.938, "dur": 0.538, + "args": { + "External id": 236935,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056812.833, "dur": 1.233, + "args": { + "External id": 236936,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056813.501, "dur": 0.503, + "args": { + "External id": 236937,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056814.306, "dur": 1.514, + "args": { + "External id": 236938,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056814.987, "dur": 0.769, + "args": { + "External id": 236939,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056816.182, "dur": 2.389, + "args": { + "External id": 236940,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056817.750, "dur": 0.759, + "args": { + "External id": 236941,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056819.022, "dur": 1.518, + "args": { + "External id": 236942,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056819.849, "dur": 0.623, + "args": { + "External id": 236943,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056820.789, "dur": 2.505, + "args": { + "External id": 236944,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056821.495, "dur": 1.634, + "args": { + "External id": 236945,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056825.170, "dur": 1.628, + "args": { + "External id": 236946,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056825.947, "dur": 0.788, + "args": { + "External id": 236947,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056827.224, "dur": 1.791, + "args": { + "External id": 236948,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056827.855, "dur": 1.098, + "args": { + "External id": 236949,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056829.576, "dur": 2.530, + "args": { + "External id": 236950,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056831.491, "dur": 0.551, + "args": { + "External id": 236951,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056832.390, "dur": 1.517, + "args": { + "External id": 236952,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056833.094, "dur": 0.747, + "args": { + "External id": 236953,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056834.324, "dur": 1.432, + "args": { + "External id": 236954,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056835.085, "dur": 0.608, + "args": { + "External id": 236955,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056836.005, "dur": 2.601, + "args": { + "External id": 236956,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056838.075, "dur": 0.467, + "args": { + "External id": 236957,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056838.872, "dur": 1.526, + "args": { + "External id": 236958,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056839.623, "dur": 0.714, + "args": { + "External id": 236959,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056840.649, "dur": 2.362, + "args": { + "External id": 236960,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056841.288, "dur": 1.646, + "args": { + "External id": 236961,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056845.034, "dur": 1.226, + "args": { + "External id": 236962,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056845.678, "dur": 0.513, + "args": { + "External id": 236963,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056846.540, "dur": 2.017, + "args": { + "External id": 236964,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056847.583, "dur": 0.912, + "args": { + "External id": 236965,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056848.795, "dur": 3.298, + "args": { + "External id": 236966,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056851.487, "dur": 0.541, + "args": { + "External id": 236967,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056852.370, "dur": 1.501, + "args": { + "External id": 236968,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056853.087, "dur": 0.720, + "args": { + "External id": 236969,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056854.111, "dur": 1.575, + "args": { + "External id": 236970,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056854.884, "dur": 0.739, + "args": { + "External id": 236971,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056855.944, "dur": 3.382, + "args": { + "External id": 236972,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056858.457, "dur": 0.805, + "args": { + "External id": 236973,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056859.573, "dur": 1.414, + "args": { + "External id": 236974,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056860.342, "dur": 0.583, + "args": { + "External id": 236975,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056861.234, "dur": 2.350, + "args": { + "External id": 236976,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056861.888, "dur": 1.630, + "args": { + "External id": 236977,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056865.543, "dur": 1.616, + "args": { + "External id": 236978,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056866.318, "dur": 0.778, + "args": { + "External id": 236979,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056867.402, "dur": 1.556, + "args": { + "External id": 236980,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056868.179, "dur": 0.711, + "args": { + "External id": 236981,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056869.338, "dur": 3.787, + "args": { + "External id": 236982,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056872.330, "dur": 0.728, + "args": { + "External id": 236983,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056873.454, "dur": 1.333, + "args": { + "External id": 236984,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056874.177, "dur": 0.547, + "args": { + "External id": 236985,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056875.039, "dur": 1.464, + "args": { + "External id": 236986,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056875.698, "dur": 0.741, + "args": { + "External id": 236987,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056876.894, "dur": 2.545, + "args": { + "External id": 236988,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056878.822, "dur": 0.552, + "args": { + "External id": 236989,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056879.683, "dur": 1.711, + "args": { + "External id": 236990,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056880.517, "dur": 0.814, + "args": { + "External id": 236991,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056881.684, "dur": 2.378, + "args": { + "External id": 236992,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056882.333, "dur": 1.664, + "args": { + "External id": 236993,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056886.138, "dur": 1.367, + "args": { + "External id": 236994,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056886.893, "dur": 0.543, + "args": { + "External id": 236995,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056888.123, "dur": 1.733, + "args": { + "External id": 236996,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056888.839, "dur": 0.953, + "args": { + "External id": 236997,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056890.331, "dur": 3.652, + "args": { + "External id": 236998,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056893.062, "dur": 0.856, + "args": { + "External id": 236999,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056894.255, "dur": 1.204, + "args": { + "External id": 237000,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056894.824, "dur": 0.571, + "args": { + "External id": 237001,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056895.744, "dur": 1.750, + "args": { + "External id": 237002,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056896.549, "dur": 0.882, + "args": { + "External id": 237003,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056897.740, "dur": 2.831, + "args": { + "External id": 237004,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056899.810, "dur": 0.692, + "args": { + "External id": 237005,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056900.812, "dur": 1.510, + "args": { + "External id": 237006,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056901.595, "dur": 0.657, + "args": { + "External id": 237007,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056902.574, "dur": 2.299, + "args": { + "External id": 237008,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056903.333, "dur": 1.471, + "args": { + "External id": 237009,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5327098056907.604, "dur": 1.464, + "args": { + "External id": 237010,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098056908.440, "dur": 0.555, + "args": { + "External id": 237011,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2070552, "tid": 2070552, + "ts": 5327098056927.709, "dur": 147.690, + "args": { + "External id": 237012,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2070552, "tid": 2070552, + "ts": 5327098057173.740, "dur": 123.360, + "args": { + "External id": 237013,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[292], [], [], [], []], "Ev Idx": 9377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2070552, "tid": 2070552, + "ts": 5327098057230.554, "dur": 48.118, + "args": { + "External id": 237014,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[292], [], [], [], []], "Ev Idx": 9378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5327098057244.897, "dur": 1.004, + "args": { + "External id": 237015,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Redistribute", "pid": 2070552, "tid": 2070552, + "ts": 5327098057696.420, "dur": 900.099, + "args": { + "External id": 237016,"Sequence number": 959181, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "False"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2070552, "tid": 2070552, + "ts": 5327098057740.172, "dur": 57.872, + "args": { + "External id": 237017,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098057744.262, "dur": 1.342, + "args": { + "External id": 237018,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098057747.408, "dur": 0.619, + "args": { + "External id": 237019,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::all_reduce", "pid": 2070552, "tid": 2070552, + "ts": 5327098057819.198, "dur": 492.926, + "args": { + "External id": 237020,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["float", "", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2070552, "tid": 2070552, + "ts": 5327098057822.759, "dur": 44.964, + "args": { + "External id": 237021,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5327098057826.032, "dur": 10.338, + "args": { + "External id": 237022,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "0"], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327098057831.634, "dur": 4.006, + "args": { + "External id": 237023,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5327098057837.992, "dur": 29.158, + "args": { + "External id": 237024,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 2070552, "tid": 2070552, + "ts": 5327098057875.859, "dur": 433.212, + "args": { + "External id": 237025,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "-1"], "Input type": ["TensorList", "", "", "", "Scalar"], "Input Strides": [[[]], [], [], [], []], "Input Dims": [[[]], [], [], [], []], "Ev Idx": 9389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327098057906.921, "dur": 394.569, + "args": { + "External id": 237026,"Record function id": 0, "Collective name": "allreduce", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[[]], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1, "Process Group Name": "0", "Input type": ["TensorList", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[[]], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9390, "In msg nelems": 1 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 2070552, "tid": 2070552, + "ts": 5327098057925.317, "dur": 369.964, + "args": { + "External id": 237027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2070552, "tid": 2070552, + "ts": 5327098058371.479, "dur": 187.512, + "args": { + "External id": 237028,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::wait_tensor", "pid": 2070552, "tid": 2070552, + "ts": 5327098058449.410, "dur": 31.978, + "args": { + "External id": 237029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5327098058468.228, "dur": 4.043, + "args": { + "External id": 237030,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9394, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2070552, "tid": 2070552, + "ts": 5327098058511.110, "dur": 41.679, + "args": { + "External id": 237031,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098058513.710, "dur": 1.221, + "args": { + "External id": 237032,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098058516.142, "dur": 0.643, + "args": { + "External id": 237033,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_ToTorchTensor", "pid": 2070552, "tid": 2070552, + "ts": 5327098058613.211, "dur": 62.851, + "args": { + "External id": 237034,"Sequence number": 959182, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5327098058661.191, "dur": 9.902, + "args": { + "External id": 237035,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5327098058664.025, "dur": 6.723, + "args": { + "External id": 237036,"Record function id": 0, "Concrete Inputs": ["", "[]"], "Input type": ["float", "ScalarList"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070552, "tid": 2070552, + "ts": 5327098059135.845, "dur": 47.250, + "args": { + "External id": 237037,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "double", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reciprocal", "pid": 2070552, "tid": 2070552, + "ts": 5327098059192.661, "dur": 28.769, + "args": { + "External id": 237038,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 2070552, "tid": 2070552, + "ts": 5327098059230.172, "dur": 20.294, + "args": { + "External id": 237039,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "double"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clamp", "pid": 2070552, "tid": 2070552, + "ts": 5327098059264.410, "dur": 22.842, + "args": { + "External id": 237040,"Record function id": 0, "Concrete Inputs": ["", "", "1."], "Input type": ["float", "", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098059267.265, "dur": 0.762, + "args": { + "External id": 237041,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5327098059304.579, "dur": 0.526, + "args": { + "External id": 237042,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 9406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2070552, "tid": 2070552, + "ts": 5327098059426.148, "dur": 892.615, + "args": { + "External id": 237043,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2070552, "tid": 2070552, + "ts": 5327098059951.149, "dur": 335.643, + "args": { + "External id": 237044,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isnan", "pid": 2070552, "tid": 2070552, + "ts": 5327098060364.728, "dur": 31.214, + "args": { + "External id": 237045,"Sequence number": 959183, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070552, "tid": 2070552, + "ts": 5327098060368.454, "dur": 26.960, + "args": { + "External id": 237046,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070552, "tid": 2070552, + "ts": 5327098060399.966, "dur": 313.144, + "args": { + "External id": 237047,"Sequence number": 959183, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070552, "tid": 2070552, + "ts": 5327098060401.623, "dur": 311.160, + "args": { + "External id": 237048,"Sequence number": 959183, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070552, "tid": 2070552, + "ts": 5327098060403.376, "dur": 308.001, + "args": { + "External id": 237049,"Sequence number": 959183, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isinf", "pid": 2070552, "tid": 2070552, + "ts": 5327098060721.396, "dur": 66.792, + "args": { + "External id": 237050,"Sequence number": 959183, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327098060724.303, "dur": 36.786, + "args": { + "External id": 237051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5327098060729.941, "dur": 3.624, + "args": { + "External id": 237052,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5327098060735.562, "dur": 25.134, + "args": { + "External id": 237053,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], [1]], "Input Dims": [[], [0]], "Ev Idx": 9417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5327098060741.032, "dur": 2.897, + "args": { + "External id": 237054,"Record function id": 0, "Concrete Inputs": ["", "[]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5327098060762.839, "dur": 24.516, + "args": { + "External id": 237055,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070552, "tid": 2070552, + "ts": 5327098060791.132, "dur": 62.941, + "args": { + "External id": 237056,"Sequence number": 959183, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070552, "tid": 2070552, + "ts": 5327098060817.560, "dur": 36.364, + "args": { + "External id": 237057,"Sequence number": 959183, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070552, "tid": 2070552, + "ts": 5327098060819.069, "dur": 34.435, + "args": { + "External id": 237058,"Sequence number": 959183, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9422 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#OptimizersContainer.step", "pid": 2070552, "tid": 2070552, + "ts": 5327098060892.737, "dur": 5458.950, + "args": { + "External id": 237059,"Record function id": 0, "Ev Idx": 9423 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#AdamW.step", "pid": 2070552, "tid": 2070552, + "ts": 5327098060924.597, "dur": 5407.505, + "args": { + "External id": 237060,"Record function id": 0, "Ev Idx": 9424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_add_", "pid": 2070552, "tid": 2070552, + "ts": 5327098062075.498, "dur": 248.012, + "args": { + "External id": 237061,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062095.808, "dur": 1.397, + "args": { + "External id": 237062,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062098.385, "dur": 0.056, + "args": { + "External id": 237063,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062098.841, "dur": 0.265, + "args": { + "External id": 237064,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062099.501, "dur": 0.080, + "args": { + "External id": 237065,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062100.041, "dur": 0.287, + "args": { + "External id": 237066,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062100.723, "dur": 0.080, + "args": { + "External id": 237067,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062101.322, "dur": 0.234, + "args": { + "External id": 237068,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062101.935, "dur": 0.267, + "args": { + "External id": 237069,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062102.548, "dur": 0.295, + "args": { + "External id": 237070,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062103.214, "dur": 0.064, + "args": { + "External id": 237071,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062103.721, "dur": 0.065, + "args": { + "External id": 237072,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062104.130, "dur": 0.061, + "args": { + "External id": 237073,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062104.597, "dur": 0.063, + "args": { + "External id": 237074,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062105.067, "dur": 0.053, + "args": { + "External id": 237075,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062105.632, "dur": 0.067, + "args": { + "External id": 237076,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062105.994, "dur": 0.064, + "args": { + "External id": 237077,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062106.394, "dur": 0.294, + "args": { + "External id": 237078,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062107.108, "dur": 0.066, + "args": { + "External id": 237079,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062107.594, "dur": 0.076, + "args": { + "External id": 237080,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062108.021, "dur": 0.066, + "args": { + "External id": 237081,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062108.448, "dur": 0.270, + "args": { + "External id": 237082,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062109.092, "dur": 0.297, + "args": { + "External id": 237083,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062109.756, "dur": 0.067, + "args": { + "External id": 237084,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062110.143, "dur": 0.407, + "args": { + "External id": 237085,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062110.917, "dur": 0.083, + "args": { + "External id": 237086,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062111.420, "dur": 0.063, + "args": { + "External id": 237087,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062111.859, "dur": 0.088, + "args": { + "External id": 237088,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062112.310, "dur": 0.062, + "args": { + "External id": 237089,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062112.730, "dur": 0.067, + "args": { + "External id": 237090,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062113.219, "dur": 0.066, + "args": { + "External id": 237091,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062113.790, "dur": 0.064, + "args": { + "External id": 237092,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062114.223, "dur": 0.071, + "args": { + "External id": 237093,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062114.693, "dur": 0.060, + "args": { + "External id": 237094,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062115.134, "dur": 0.065, + "args": { + "External id": 237095,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062115.560, "dur": 0.063, + "args": { + "External id": 237096,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062116.028, "dur": 0.062, + "args": { + "External id": 237097,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062116.493, "dur": 0.060, + "args": { + "External id": 237098,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062116.923, "dur": 0.065, + "args": { + "External id": 237099,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062117.263, "dur": 0.061, + "args": { + "External id": 237100,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062117.612, "dur": 0.092, + "args": { + "External id": 237101,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062118.033, "dur": 0.069, + "args": { + "External id": 237102,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062118.517, "dur": 0.063, + "args": { + "External id": 237103,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062118.921, "dur": 0.080, + "args": { + "External id": 237104,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062119.419, "dur": 0.262, + "args": { + "External id": 237105,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062119.968, "dur": 0.254, + "args": { + "External id": 237106,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062120.640, "dur": 0.292, + "args": { + "External id": 237107,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062121.353, "dur": 0.100, + "args": { + "External id": 237108,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062121.882, "dur": 0.284, + "args": { + "External id": 237109,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062122.518, "dur": 0.097, + "args": { + "External id": 237110,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062122.966, "dur": 0.063, + "args": { + "External id": 237111,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062123.337, "dur": 0.061, + "args": { + "External id": 237112,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062123.756, "dur": 0.065, + "args": { + "External id": 237113,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062124.117, "dur": 0.063, + "args": { + "External id": 237114,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062124.615, "dur": 0.051, + "args": { + "External id": 237115,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062125.030, "dur": 0.070, + "args": { + "External id": 237116,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062125.439, "dur": 0.067, + "args": { + "External id": 237117,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062125.803, "dur": 0.063, + "args": { + "External id": 237118,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062126.253, "dur": 0.063, + "args": { + "External id": 237119,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062126.590, "dur": 0.063, + "args": { + "External id": 237120,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062127.052, "dur": 0.065, + "args": { + "External id": 237121,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062127.421, "dur": 0.065, + "args": { + "External id": 237122,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062127.893, "dur": 0.065, + "args": { + "External id": 237123,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062128.506, "dur": 0.064, + "args": { + "External id": 237124,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062128.854, "dur": 0.074, + "args": { + "External id": 237125,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062129.379, "dur": 0.061, + "args": { + "External id": 237126,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062129.818, "dur": 0.062, + "args": { + "External id": 237127,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062130.355, "dur": 0.064, + "args": { + "External id": 237128,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062130.810, "dur": 0.053, + "args": { + "External id": 237129,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062131.210, "dur": 0.064, + "args": { + "External id": 237130,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062131.681, "dur": 0.058, + "args": { + "External id": 237131,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062132.310, "dur": 0.069, + "args": { + "External id": 237132,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062132.766, "dur": 0.092, + "args": { + "External id": 237133,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062133.269, "dur": 0.070, + "args": { + "External id": 237134,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062133.735, "dur": 0.066, + "args": { + "External id": 237135,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062134.179, "dur": 0.065, + "args": { + "External id": 237136,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062134.626, "dur": 0.265, + "args": { + "External id": 237137,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062135.263, "dur": 0.266, + "args": { + "External id": 237138,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062135.906, "dur": 0.293, + "args": { + "External id": 237139,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062136.592, "dur": 0.290, + "args": { + "External id": 237140,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062137.181, "dur": 0.331, + "args": { + "External id": 237141,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062137.852, "dur": 0.069, + "args": { + "External id": 237142,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062138.277, "dur": 0.063, + "args": { + "External id": 237143,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062138.728, "dur": 0.062, + "args": { + "External id": 237144,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062139.145, "dur": 0.069, + "args": { + "External id": 237145,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062139.657, "dur": 0.055, + "args": { + "External id": 237146,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062140.036, "dur": 0.069, + "args": { + "External id": 237147,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062140.477, "dur": 0.069, + "args": { + "External id": 237148,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062140.928, "dur": 0.121, + "args": { + "External id": 237149,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062141.437, "dur": 0.287, + "args": { + "External id": 237150,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062142.085, "dur": 0.066, + "args": { + "External id": 237151,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062142.470, "dur": 0.283, + "args": { + "External id": 237152,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062143.128, "dur": 0.066, + "args": { + "External id": 237153,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062143.528, "dur": 0.063, + "args": { + "External id": 237154,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062143.902, "dur": 0.063, + "args": { + "External id": 237155,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062144.301, "dur": 0.065, + "args": { + "External id": 237156,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062144.746, "dur": 0.066, + "args": { + "External id": 237157,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062145.163, "dur": 0.064, + "args": { + "External id": 237158,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062145.578, "dur": 0.066, + "args": { + "External id": 237159,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062146.193, "dur": 0.057, + "args": { + "External id": 237160,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062146.584, "dur": 0.050, + "args": { + "External id": 237161,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062146.972, "dur": 0.066, + "args": { + "External id": 237162,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062147.400, "dur": 0.058, + "args": { + "External id": 237163,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062147.820, "dur": 0.068, + "args": { + "External id": 237164,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062148.267, "dur": 0.066, + "args": { + "External id": 237165,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062148.715, "dur": 0.063, + "args": { + "External id": 237166,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062149.101, "dur": 0.064, + "args": { + "External id": 237167,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062149.567, "dur": 0.063, + "args": { + "External id": 237168,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062149.974, "dur": 0.063, + "args": { + "External id": 237169,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062150.360, "dur": 0.065, + "args": { + "External id": 237170,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062150.697, "dur": 0.062, + "args": { + "External id": 237171,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062151.105, "dur": 0.067, + "args": { + "External id": 237172,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062151.481, "dur": 0.066, + "args": { + "External id": 237173,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062151.929, "dur": 0.063, + "args": { + "External id": 237174,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062152.350, "dur": 0.060, + "args": { + "External id": 237175,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062152.738, "dur": 0.067, + "args": { + "External id": 237176,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062153.178, "dur": 0.063, + "args": { + "External id": 237177,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062153.610, "dur": 0.063, + "args": { + "External id": 237178,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062154.150, "dur": 0.321, + "args": { + "External id": 237179,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062154.906, "dur": 0.301, + "args": { + "External id": 237180,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062155.575, "dur": 0.302, + "args": { + "External id": 237181,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062156.278, "dur": 0.270, + "args": { + "External id": 237182,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062156.915, "dur": 0.066, + "args": { + "External id": 237183,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062157.245, "dur": 0.281, + "args": { + "External id": 237184,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062157.883, "dur": 0.294, + "args": { + "External id": 237185,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062158.557, "dur": 0.252, + "args": { + "External id": 237186,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062159.182, "dur": 0.302, + "args": { + "External id": 237187,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062159.786, "dur": 0.304, + "args": { + "External id": 237188,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062160.445, "dur": 0.271, + "args": { + "External id": 237189,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062160.988, "dur": 0.064, + "args": { + "External id": 237190,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062161.430, "dur": 0.053, + "args": { + "External id": 237191,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062161.832, "dur": 0.067, + "args": { + "External id": 237192,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062162.334, "dur": 0.056, + "args": { + "External id": 237193,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062162.785, "dur": 0.068, + "args": { + "External id": 237194,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062163.201, "dur": 0.063, + "args": { + "External id": 237195,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062163.585, "dur": 0.056, + "args": { + "External id": 237196,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062164.010, "dur": 0.062, + "args": { + "External id": 237197,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062164.417, "dur": 0.067, + "args": { + "External id": 237198,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062164.780, "dur": 0.067, + "args": { + "External id": 237199,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062165.180, "dur": 0.064, + "args": { + "External id": 237200,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062165.637, "dur": 0.067, + "args": { + "External id": 237201,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062166.007, "dur": 0.067, + "args": { + "External id": 237202,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062166.401, "dur": 0.069, + "args": { + "External id": 237203,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062166.752, "dur": 0.065, + "args": { + "External id": 237204,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062167.157, "dur": 0.069, + "args": { + "External id": 237205,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062167.614, "dur": 0.066, + "args": { + "External id": 237206,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062168.006, "dur": 0.066, + "args": { + "External id": 237207,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062168.702, "dur": 0.068, + "args": { + "External id": 237208,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062169.350, "dur": 0.065, + "args": { + "External id": 237209,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062169.811, "dur": 0.066, + "args": { + "External id": 237210,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062170.177, "dur": 0.069, + "args": { + "External id": 237211,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062170.541, "dur": 0.067, + "args": { + "External id": 237212,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062170.954, "dur": 0.066, + "args": { + "External id": 237213,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062171.482, "dur": 0.063, + "args": { + "External id": 237214,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062171.916, "dur": 0.065, + "args": { + "External id": 237215,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062172.378, "dur": 0.064, + "args": { + "External id": 237216,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062172.805, "dur": 0.063, + "args": { + "External id": 237217,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062173.221, "dur": 0.410, + "args": { + "External id": 237218,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062173.970, "dur": 0.293, + "args": { + "External id": 237219,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062174.666, "dur": 0.084, + "args": { + "External id": 237220,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062175.152, "dur": 0.295, + "args": { + "External id": 237221,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062175.774, "dur": 0.282, + "args": { + "External id": 237222,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062176.456, "dur": 0.060, + "args": { + "External id": 237223,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062176.885, "dur": 0.247, + "args": { + "External id": 237224,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062177.583, "dur": 0.082, + "args": { + "External id": 237225,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062178.018, "dur": 0.258, + "args": { + "External id": 237226,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062178.659, "dur": 0.104, + "args": { + "External id": 237227,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062179.122, "dur": 0.106, + "args": { + "External id": 237228,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062179.583, "dur": 0.100, + "args": { + "External id": 237229,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062180.058, "dur": 0.099, + "args": { + "External id": 237230,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062180.590, "dur": 0.064, + "args": { + "External id": 237231,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062181.079, "dur": 0.073, + "args": { + "External id": 237232,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062189.060, "dur": 0.078, + "args": { + "External id": 237233,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062189.842, "dur": 0.062, + "args": { + "External id": 237234,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062190.329, "dur": 0.055, + "args": { + "External id": 237235,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062190.757, "dur": 0.068, + "args": { + "External id": 237236,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062191.198, "dur": 0.066, + "args": { + "External id": 237237,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062191.665, "dur": 0.063, + "args": { + "External id": 237238,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062192.031, "dur": 0.065, + "args": { + "External id": 237239,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062192.498, "dur": 0.064, + "args": { + "External id": 237240,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062192.840, "dur": 0.074, + "args": { + "External id": 237241,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062193.265, "dur": 0.069, + "args": { + "External id": 237242,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062193.682, "dur": 0.087, + "args": { + "External id": 237243,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062194.140, "dur": 0.104, + "args": { + "External id": 237244,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062194.645, "dur": 0.086, + "args": { + "External id": 237245,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062195.135, "dur": 0.294, + "args": { + "External id": 237246,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062195.780, "dur": 0.066, + "args": { + "External id": 237247,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062196.253, "dur": 0.088, + "args": { + "External id": 237248,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062196.742, "dur": 0.076, + "args": { + "External id": 237249,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062197.243, "dur": 0.064, + "args": { + "External id": 237250,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062197.688, "dur": 0.093, + "args": { + "External id": 237251,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062198.120, "dur": 0.194, + "args": { + "External id": 237252,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062198.716, "dur": 0.057, + "args": { + "External id": 237253,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062199.108, "dur": 0.061, + "args": { + "External id": 237254,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062199.537, "dur": 0.055, + "args": { + "External id": 237255,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062199.956, "dur": 0.066, + "args": { + "External id": 237256,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062200.318, "dur": 0.064, + "args": { + "External id": 237257,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062200.689, "dur": 0.062, + "args": { + "External id": 237258,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062201.084, "dur": 0.067, + "args": { + "External id": 237259,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062201.535, "dur": 0.068, + "args": { + "External id": 237260,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062201.942, "dur": 0.068, + "args": { + "External id": 237261,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062202.329, "dur": 0.064, + "args": { + "External id": 237262,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062202.742, "dur": 0.050, + "args": { + "External id": 237263,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062203.092, "dur": 0.056, + "args": { + "External id": 237264,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062203.443, "dur": 0.063, + "args": { + "External id": 237265,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062203.785, "dur": 0.065, + "args": { + "External id": 237266,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062204.267, "dur": 0.067, + "args": { + "External id": 237267,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062204.648, "dur": 0.067, + "args": { + "External id": 237268,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062205.050, "dur": 0.068, + "args": { + "External id": 237269,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062205.431, "dur": 0.070, + "args": { + "External id": 237270,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062205.814, "dur": 0.055, + "args": { + "External id": 237271,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062206.250, "dur": 0.066, + "args": { + "External id": 237272,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062206.630, "dur": 0.065, + "args": { + "External id": 237273,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062207.094, "dur": 0.048, + "args": { + "External id": 237274,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062207.465, "dur": 0.281, + "args": { + "External id": 237275,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062208.161, "dur": 0.298, + "args": { + "External id": 237276,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062208.788, "dur": 0.300, + "args": { + "External id": 237277,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062209.385, "dur": 0.269, + "args": { + "External id": 237278,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062210.024, "dur": 0.066, + "args": { + "External id": 237279,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062210.541, "dur": 0.071, + "args": { + "External id": 237280,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062210.992, "dur": 0.269, + "args": { + "External id": 237281,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062211.594, "dur": 0.258, + "args": { + "External id": 237282,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062212.234, "dur": 0.282, + "args": { + "External id": 237283,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062212.902, "dur": 0.090, + "args": { + "External id": 237284,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062213.353, "dur": 0.067, + "args": { + "External id": 237285,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062213.763, "dur": 0.066, + "args": { + "External id": 237286,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062214.160, "dur": 0.062, + "args": { + "External id": 237287,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062214.543, "dur": 0.066, + "args": { + "External id": 237288,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062214.940, "dur": 0.062, + "args": { + "External id": 237289,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062215.341, "dur": 0.065, + "args": { + "External id": 237290,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062215.768, "dur": 0.068, + "args": { + "External id": 237291,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062216.087, "dur": 0.063, + "args": { + "External id": 237292,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062216.675, "dur": 0.069, + "args": { + "External id": 237293,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062217.104, "dur": 0.057, + "args": { + "External id": 237294,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062217.471, "dur": 0.063, + "args": { + "External id": 237295,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062217.795, "dur": 0.096, + "args": { + "External id": 237296,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062218.319, "dur": 0.343, + "args": { + "External id": 237297,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062218.909, "dur": 0.266, + "args": { + "External id": 237298,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062219.536, "dur": 0.084, + "args": { + "External id": 237299,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062219.864, "dur": 0.086, + "args": { + "External id": 237300,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062220.334, "dur": 0.088, + "args": { + "External id": 237301,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062220.674, "dur": 0.254, + "args": { + "External id": 237302,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062221.307, "dur": 0.065, + "args": { + "External id": 237303,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062221.656, "dur": 0.063, + "args": { + "External id": 237304,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062222.133, "dur": 0.064, + "args": { + "External id": 237305,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062222.441, "dur": 0.052, + "args": { + "External id": 237306,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062222.835, "dur": 0.068, + "args": { + "External id": 237307,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062223.147, "dur": 0.053, + "args": { + "External id": 237308,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062223.534, "dur": 0.061, + "args": { + "External id": 237309,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062223.872, "dur": 0.057, + "args": { + "External id": 237310,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062224.335, "dur": 0.052, + "args": { + "External id": 237311,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062224.634, "dur": 0.049, + "args": { + "External id": 237312,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062225.073, "dur": 0.063, + "args": { + "External id": 237313,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062225.385, "dur": 0.053, + "args": { + "External id": 237314,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062225.723, "dur": 0.064, + "args": { + "External id": 237315,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062226.034, "dur": 0.050, + "args": { + "External id": 237316,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062226.428, "dur": 0.061, + "args": { + "External id": 237317,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062226.755, "dur": 0.054, + "args": { + "External id": 237318,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062227.272, "dur": 0.065, + "args": { + "External id": 237319,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062227.580, "dur": 0.060, + "args": { + "External id": 237320,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062227.998, "dur": 0.067, + "args": { + "External id": 237321,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062228.309, "dur": 0.055, + "args": { + "External id": 237322,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062228.673, "dur": 0.065, + "args": { + "External id": 237323,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062228.979, "dur": 0.054, + "args": { + "External id": 237324,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062229.316, "dur": 0.062, + "args": { + "External id": 237325,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062229.623, "dur": 0.053, + "args": { + "External id": 237326,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062230.020, "dur": 0.062, + "args": { + "External id": 237327,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062230.325, "dur": 0.055, + "args": { + "External id": 237328,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062230.747, "dur": 0.064, + "args": { + "External id": 237329,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062231.056, "dur": 0.054, + "args": { + "External id": 237330,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062231.506, "dur": 0.063, + "args": { + "External id": 237331,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062231.834, "dur": 0.053, + "args": { + "External id": 237332,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062232.269, "dur": 0.063, + "args": { + "External id": 237333,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062232.603, "dur": 0.050, + "args": { + "External id": 237334,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062233.077, "dur": 0.061, + "args": { + "External id": 237335,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062233.388, "dur": 0.054, + "args": { + "External id": 237336,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062233.724, "dur": 0.063, + "args": { + "External id": 237337,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062234.032, "dur": 0.053, + "args": { + "External id": 237338,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062234.462, "dur": 0.064, + "args": { + "External id": 237339,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062234.772, "dur": 0.053, + "args": { + "External id": 237340,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062235.107, "dur": 0.064, + "args": { + "External id": 237341,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062235.416, "dur": 0.052, + "args": { + "External id": 237342,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062235.816, "dur": 0.064, + "args": { + "External id": 237343,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062236.125, "dur": 0.053, + "args": { + "External id": 237344,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062236.513, "dur": 0.067, + "args": { + "External id": 237345,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062236.835, "dur": 0.054, + "args": { + "External id": 237346,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062241.492, "dur": 0.054, + "args": { + "External id": 237347,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062241.978, "dur": 0.062, + "args": { + "External id": 237348,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062242.589, "dur": 0.069, + "args": { + "External id": 237349,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062243.032, "dur": 0.061, + "args": { + "External id": 237350,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062243.479, "dur": 0.061, + "args": { + "External id": 237351,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062243.899, "dur": 0.063, + "args": { + "External id": 237352,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5327098062244.333, "dur": 0.063, + "args": { + "External id": 237353,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2070552, "tid": 2070552, + "ts": 5327098062821.843, "dur": 3421.225, + "args": { + "External id": 237354,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "4.1442628849528264e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2070552, "tid": 2070552, + "ts": 5327098065712.705, "dur": 390.993, + "args": { + "External id": 237355,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "4.1442628849528264e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9719 + } + }, + { + "name": "process_name", "ph": "M", "ts": 5327096018666.319, "pid": 2070552, "tid": 0, + "args": { + "name": "python3.12" + } + }, + { + "name": "process_labels", "ph": "M", "ts": 5327096018666.319, "pid": 2070552, "tid": 0, + "args": { + "labels": "CPU" + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 5327096018666.319, "pid": 2070552, "tid": 0, + "args": { + "sort_index": 2070552 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 5327096018666.319, "pid": 2070552, "tid": 2070552, + "args": { + "name": "thread 2070552 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 5327096018666.319, "pid": 2070552, "tid": 2070552, + "args": { + "sort_index": 2070552 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 5327096018666.319, "pid": 2070552, "tid": 2107648, + "args": { + "name": "thread 2107648 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 5327096018666.319, "pid": 2070552, "tid": 2107648, + "args": { + "sort_index": 2107648 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 5327096018666.319, "pid": 2070552, "tid": 2107648, + "args": { + "name": "thread 2107648 (pt_autograd_5)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 5327096018666.319, "pid": 2070552, "tid": 2107648, + "args": { + "sort_index": 2107648 + } + }, + { + "ph": "X", "cat": "Trace", "ts": 5327096018570.798, "dur": 2049508.707, + "pid": "Spans", "tid": "PyTorch Profiler", + "name": "PyTorch Profiler (0)", + "args": { + "Op count": 0 + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 5327096018570.798, + "pid": "Spans", "tid": 0, + "args": { + "sort_index": 536870912 + } + }, + { + "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", + "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 5327096018570.798 + }, + { + "name": "Record Window End", "ph": "i", "s": "g", + "pid": "", "tid": "", "ts": 5327098109570.496 + } + ], + "traceName": "exp/top.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/profile_trace/iteration_11776/rank5_trace.json", + "displayTimeUnit": "ms", + "baseTimeNanoseconds": 1751410836000000000 +} \ No newline at end of file